yash_syntax/source.rs
1// This file is part of yash, an extended POSIX shell.
2// Copyright (C) 2020 WATANABE Yuki
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with this program. If not, see <https://www.gnu.org/licenses/>.
16
17//! Source code that is passed to the parser
18//!
19//! This module contains items representing information about the source code
20//! from which ASTs originate. [`Source`] identifies the origin of source code
21//! fragments contained in [`Code`]. A [`Location`] specifies a particular
22//! character in a `Code` instance. You can use the [`pretty`] submodule to
23//! format messages describing source code locations.
24
25pub mod pretty;
26
27use crate::alias::Alias;
28use std::cell::RefCell;
29use std::num::NonZeroU64;
30use std::ops::Range;
31use std::rc::Rc;
32
33/// Origin of source code
34#[derive(Clone, Debug, Eq, PartialEq)]
35#[non_exhaustive]
36pub enum Source {
37 /// Source code of unknown origin
38 ///
39 /// Normally you should not use this value, but it may be useful for quick debugging.
40 Unknown,
41
42 /// Standard input
43 Stdin,
44
45 /// Command string specified with the `-c` option on the shell startup
46 CommandString,
47
48 /// File specified on the shell startup
49 CommandFile { path: String },
50
51 /// Alias substitution
52 ///
53 /// This applies to a code fragment that replaced another as a result of alias substitution.
54 Alias {
55 /// Position of the original word that was replaced
56 original: Location,
57 /// Definition of the alias that was substituted
58 alias: Rc<Alias>,
59 },
60
61 /// Command substitution
62 CommandSubst { original: Location },
63
64 /// Arithmetic expansion
65 Arith { original: Location },
66
67 /// Command string executed by the `eval` built-in
68 Eval { original: Location },
69
70 /// File executed by the `.` (`source`) built-in
71 DotScript {
72 /// Pathname of the file
73 name: String,
74 /// Location of the simple command that invoked the `.` built-in
75 origin: Location,
76 },
77
78 /// Trap command
79 Trap {
80 /// Trap condition name, typically the signal name
81 condition: String,
82 /// Location of the simple command that has set this trap command
83 origin: Location,
84 },
85
86 /// Value of a variable
87 VariableValue {
88 /// Variable name
89 name: String,
90 },
91
92 /// File executed during shell startup
93 InitFile { path: String },
94
95 /// Other source
96 Other {
97 /// Label that describes the source
98 label: String,
99 },
100 // TODO More Source types
101}
102
103impl Source {
104 /// Tests if this source is alias substitution for the given name.
105 ///
106 /// Returns true if `self` is `Source::Alias` with the `name` or such an
107 /// original, recursively.
108 ///
109 /// ```
110 /// // `is_alias_for` returns false for sources other than an Alias
111 /// # use yash_syntax::source::Source;
112 /// assert_eq!(Source::Unknown.is_alias_for("foo"), false);
113 /// ```
114 ///
115 /// ```
116 /// // `is_alias_for` returns true if the names match
117 /// # use yash_syntax::source::*;
118 /// let original = Location::dummy("");
119 /// let alias = std::rc::Rc::new(yash_syntax::alias::Alias {
120 /// name: "foo".to_string(),
121 /// replacement: "".to_string(),
122 /// global: false,
123 /// origin: original.clone()
124 /// });
125 /// let source = Source::Alias { original, alias };
126 /// assert_eq!(source.is_alias_for("foo"), true);
127 /// assert_eq!(source.is_alias_for("bar"), false);
128 /// ```
129 ///
130 /// ```
131 /// // `is_alias_for` checks aliases recursively.
132 /// # use std::rc::Rc;
133 /// # use yash_syntax::source::*;
134 /// let original = Location::dummy("");
135 /// let alias = Rc::new(yash_syntax::alias::Alias {
136 /// name: "foo".to_string(),
137 /// replacement: "".to_string(),
138 /// global: false,
139 /// origin: original.clone(),
140 /// });
141 /// let source = Source::Alias { original, alias };
142 /// let alias = Rc::new(yash_syntax::alias::Alias {
143 /// name: "bar".to_string(),
144 /// replacement: "".to_string(),
145 /// global: false,
146 /// origin: Location::dummy(""),
147 /// });
148 /// let mut original = Location::dummy("");
149 /// Rc::make_mut(&mut original.code).source = Rc::new(source);
150 /// let source = Source::Alias { original, alias };
151 /// assert_eq!(source.is_alias_for("foo"), true);
152 /// assert_eq!(source.is_alias_for("bar"), true);
153 /// assert_eq!(source.is_alias_for("baz"), false);
154 /// ```
155 pub fn is_alias_for(&self, name: &str) -> bool {
156 if let Source::Alias { original, alias } = self {
157 alias.name == name || original.code.source.is_alias_for(name)
158 } else {
159 false
160 }
161 }
162
163 /// Returns a label that describes the source.
164 pub fn label(&self) -> &str {
165 use Source::*;
166 match self {
167 Unknown => "<?>",
168 Stdin => "<stdin>",
169 CommandString => "<command_string>",
170 CommandFile { path } => path,
171 Alias { .. } => "<alias>",
172 CommandSubst { .. } => "<command_substitution>",
173 Arith { .. } => "<arithmetic_expansion>",
174 Eval { .. } => "<eval>",
175 DotScript { name, .. } => name,
176 Trap { condition, .. } => condition,
177 VariableValue { name } => name,
178 InitFile { path } => path,
179 Other { label } => label,
180 }
181 }
182}
183
184/// Source code fragment
185///
186/// An instance of `Code` contains a block of the source code that was parsed to
187/// produce an AST.
188#[derive(Clone, Debug, Eq, PartialEq)]
189pub struct Code {
190 /// Content of the code, usually terminated by a newline
191 ///
192 /// The value is contained in a `RefCell` so that more lines can be appended
193 /// to the value as the parser reads input lines. It is not intended to be
194 /// mutably borrowed for other purposes.
195 pub value: RefCell<String>,
196
197 /// Line number of the first line of the code. Counted from 1.
198 pub start_line_number: NonZeroU64,
199
200 /// Origin of this code
201 pub source: Rc<Source>,
202}
203
204impl Code {
205 /// Computes the line number of the character at the given index.
206 ///
207 /// The index should be between 0 and `self.value.borrow().chars().count()`.
208 /// The return value is `self.start_line_number` plus the number of newlines
209 /// in `self.value` up to the character at `char_index`. If `char_index` is
210 /// out of bounds, the return value is for the last character.
211 ///
212 /// This function will panic if `self.value` has been mutually borrowed.
213 #[must_use]
214 pub fn line_number(&self, char_index: usize) -> NonZeroU64 {
215 let newlines = self
216 .value
217 .borrow()
218 .chars()
219 .take(char_index)
220 .filter(|c| *c == '\n')
221 .count()
222 .try_into()
223 .unwrap_or(u64::MAX);
224 self.start_line_number.saturating_add(newlines)
225 }
226}
227
228/// Creates an iterator of [source char](SourceChar)s from a string.
229///
230/// `index_offset` will be the index of the first source char's location.
231/// For each succeeding char, the index will be incremented by one.
232///
233/// ```
234/// # use yash_syntax::source::{Code, Source, source_chars};
235/// # use std::cell::RefCell;
236/// # use std::num::NonZeroU64;
237/// # use std::rc::Rc;
238/// let s = "abc";
239/// let code = Rc::new(Code {
240/// value: RefCell::new(s.to_string()),
241/// start_line_number: NonZeroU64::new(1).unwrap(),
242/// source: Rc::new(Source::Unknown),
243/// });
244/// let chars: Vec<_> = source_chars(s, &code, 10).collect();
245/// assert_eq!(chars[0].value, 'a');
246/// assert_eq!(chars[0].location.code, code);
247/// assert_eq!(chars[0].location.range, 10..11);
248/// assert_eq!(chars[1].value, 'b');
249/// assert_eq!(chars[1].location.code, code);
250/// assert_eq!(chars[1].location.range, 11..12);
251/// ```
252pub fn source_chars<'a>(
253 s: &'a str,
254 code: &'a Rc<Code>,
255 index_offset: usize,
256) -> impl Iterator<Item = SourceChar> + 'a {
257 s.chars().enumerate().map(move |(i, value)| SourceChar {
258 value,
259 location: Location {
260 code: Rc::clone(code),
261 range: index_offset + i..index_offset + i + 1,
262 },
263 })
264}
265
266/// Position of source code
267#[derive(Clone, Debug, Eq, PartialEq)]
268pub struct Location {
269 /// Code that contains the character
270 pub code: Rc<Code>,
271
272 /// Character position in the code, counted from 0
273 ///
274 /// Characters are counted in the number of Unicode scalar values, not
275 /// bytes. That means the index should be between 0 and
276 /// `code.value.borrow().chars().count()`.
277 pub range: Range<usize>,
278}
279
280impl Location {
281 /// Creates a dummy location.
282 ///
283 /// The returned location has [unknown](Source::Unknown) source and the
284 /// given source code value. The `start_line_number` will be 1.
285 /// The location ranges over the whole code.
286 ///
287 /// This function is mainly for use in testing.
288 #[inline]
289 pub fn dummy<S: Into<String>>(value: S) -> Location {
290 fn with_line(value: String) -> Location {
291 let range = 0..value.chars().count();
292 let code = Rc::new(Code {
293 value: RefCell::new(value),
294 start_line_number: NonZeroU64::new(1).unwrap(),
295 source: Rc::new(Source::Unknown),
296 });
297 Location { code, range }
298 }
299 with_line(value.into())
300 }
301}
302
303/// Character with source description
304#[derive(Clone, Debug, Eq, PartialEq)]
305pub struct SourceChar {
306 /// Character value
307 pub value: char,
308 /// Location of this character in source code
309 pub location: Location,
310}
311
312#[cfg(test)]
313mod tests {
314 use super::*;
315
316 #[test]
317 fn line_number() {
318 let code = Code {
319 value: RefCell::new("a\nbc\nd".to_string()),
320 start_line_number: NonZeroU64::new(1).unwrap(),
321 source: Rc::new(Source::Unknown),
322 };
323 assert_eq!(code.line_number(0).get(), 1);
324 assert_eq!(code.line_number(1).get(), 1);
325 assert_eq!(code.line_number(2).get(), 2);
326 assert_eq!(code.line_number(3).get(), 2);
327 assert_eq!(code.line_number(4).get(), 2);
328 assert_eq!(code.line_number(5).get(), 3);
329 assert_eq!(code.line_number(6).get(), 3);
330 assert_eq!(code.line_number(7).get(), 3);
331 assert_eq!(code.line_number(usize::MAX).get(), 3);
332
333 let code = Code {
334 start_line_number: NonZeroU64::new(3).unwrap(),
335 ..code
336 };
337 assert_eq!(code.line_number(0).get(), 3);
338 assert_eq!(code.line_number(1).get(), 3);
339 assert_eq!(code.line_number(2).get(), 4);
340 assert_eq!(code.line_number(3).get(), 4);
341 assert_eq!(code.line_number(4).get(), 4);
342 assert_eq!(code.line_number(5).get(), 5);
343 assert_eq!(code.line_number(6).get(), 5);
344 assert_eq!(code.line_number(7).get(), 5);
345 assert_eq!(code.line_number(usize::MAX).get(), 5);
346 }
347}