yash_env/source.rs
1// This file is part of yash, an extended POSIX shell.
2// Copyright (C) 2020 WATANABE Yuki
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with this program. If not, see <https://www.gnu.org/licenses/>.
16
17//! Source code that is passed to the parser
18//!
19//! This module contains items representing information about source code from
20//! which ASTs originate. [`Source`] identifies the origin of source code
21//! fragments contained in [`Code`]. A [`Location`] specifies a particular
22//! character in a `Code` instance. You can use the [`pretty`] submodule to
23//! format messages describing source code locations.
24
25use crate::alias::Alias;
26use std::cell::RefCell;
27use std::num::NonZeroU64;
28use std::ops::Range;
29use std::rc::Rc;
30
31/// Origin of source code
32#[derive(Clone, Debug, Eq, PartialEq)]
33#[non_exhaustive]
34pub enum Source {
35 /// Source code of unknown origin
36 ///
37 /// Normally you should not use this value, but it may be useful for quick debugging.
38 Unknown,
39
40 /// Standard input
41 Stdin,
42
43 /// Command string specified with the `-c` option on the shell startup
44 CommandString,
45
46 /// File specified on the shell startup
47 CommandFile { path: String },
48
49 /// Alias substitution
50 ///
51 /// This applies to a code fragment that replaced another as a result of alias substitution.
52 Alias {
53 /// Position of the original word that was replaced
54 original: Location,
55 /// Definition of the alias that was substituted
56 alias: Rc<Alias>,
57 },
58
59 /// Command substitution
60 CommandSubst { original: Location },
61
62 /// Arithmetic expansion
63 Arith { original: Location },
64
65 /// Command string executed by the `eval` built-in
66 Eval { original: Location },
67
68 /// File executed by the `.` (`source`) built-in
69 DotScript {
70 /// Pathname of the file
71 name: String,
72 /// Location of the simple command that invoked the `.` built-in
73 origin: Location,
74 },
75
76 /// Trap command
77 Trap {
78 /// Trap condition name, typically the signal name
79 condition: String,
80 /// Location of the simple command that has set this trap command
81 origin: Location,
82 },
83
84 /// Value of a variable
85 VariableValue {
86 /// Variable name
87 name: String,
88 },
89
90 /// File executed during shell startup
91 InitFile { path: String },
92
93 /// Other source
94 Other {
95 /// Label that describes the source
96 label: String,
97 },
98 // TODO More Source types
99}
100
101impl Source {
102 /// Tests if this source is alias substitution for the given name.
103 ///
104 /// Returns true if `self` is `Source::Alias` with the `name` or such an
105 /// original, recursively.
106 ///
107 /// ```
108 /// // `is_alias_for` returns false for sources other than an Alias
109 /// # use yash_env::source::Source;
110 /// assert_eq!(Source::Unknown.is_alias_for("foo"), false);
111 /// ```
112 ///
113 /// ```
114 /// // `is_alias_for` returns true if the names match
115 /// # use yash_env::source::*;
116 /// let original = Location::dummy("");
117 /// let alias = std::rc::Rc::new(yash_env::alias::Alias {
118 /// name: "foo".to_string(),
119 /// replacement: "".to_string(),
120 /// global: false,
121 /// origin: original.clone()
122 /// });
123 /// let source = Source::Alias { original, alias };
124 /// assert_eq!(source.is_alias_for("foo"), true);
125 /// assert_eq!(source.is_alias_for("bar"), false);
126 /// ```
127 ///
128 /// ```
129 /// // `is_alias_for` checks aliases recursively.
130 /// # use std::rc::Rc;
131 /// # use yash_env::source::*;
132 /// let original = Location::dummy("");
133 /// let alias = Rc::new(yash_env::alias::Alias {
134 /// name: "foo".to_string(),
135 /// replacement: "".to_string(),
136 /// global: false,
137 /// origin: original.clone(),
138 /// });
139 /// let source = Source::Alias { original, alias };
140 /// let alias = Rc::new(yash_env::alias::Alias {
141 /// name: "bar".to_string(),
142 /// replacement: "".to_string(),
143 /// global: false,
144 /// origin: Location::dummy(""),
145 /// });
146 /// let mut original = Location::dummy("");
147 /// Rc::make_mut(&mut original.code).source = Rc::new(source);
148 /// let source = Source::Alias { original, alias };
149 /// assert_eq!(source.is_alias_for("foo"), true);
150 /// assert_eq!(source.is_alias_for("bar"), true);
151 /// assert_eq!(source.is_alias_for("baz"), false);
152 /// ```
153 pub fn is_alias_for(&self, name: &str) -> bool {
154 if let Source::Alias { original, alias } = self {
155 alias.name == name || original.code.source.is_alias_for(name)
156 } else {
157 false
158 }
159 }
160
161 /// Returns a label that describes the source.
162 pub fn label(&self) -> &str {
163 use Source::*;
164 match self {
165 Unknown => "<?>",
166 Stdin => "<stdin>",
167 CommandString => "<command_string>",
168 CommandFile { path } => path,
169 Alias { .. } => "<alias>",
170 CommandSubst { .. } => "<command_substitution>",
171 Arith { .. } => "<arithmetic_expansion>",
172 Eval { .. } => "<eval>",
173 DotScript { name, .. } => name,
174 Trap { condition, .. } => condition,
175 VariableValue { name } => name,
176 InitFile { path } => path,
177 Other { label } => label,
178 }
179 }
180}
181
182/// Source code fragment
183///
184/// An instance of `Code` contains a block of the source code that was parsed to
185/// produce an AST.
186#[derive(Clone, Debug, Eq, PartialEq)]
187pub struct Code {
188 /// Content of the code, usually terminated by a newline
189 ///
190 /// The value is contained in a `RefCell` so that more lines can be appended
191 /// to the value as the parser reads input lines. It is not intended to be
192 /// mutably borrowed for other purposes.
193 pub value: RefCell<String>,
194
195 /// Line number of the first line of the code. Counted from 1.
196 pub start_line_number: NonZeroU64,
197
198 /// Origin of this code
199 pub source: Rc<Source>,
200}
201
202impl Code {
203 /// Computes the line number of the character at the given index.
204 ///
205 /// The index should be between 0 and `self.value.borrow().chars().count()`.
206 /// The return value is `self.start_line_number` plus the number of newlines
207 /// in `self.value` up to the character at `char_index`. If `char_index` is
208 /// out of bounds, the return value is for the last character.
209 ///
210 /// This function will panic if `self.value` has been mutually borrowed.
211 #[must_use]
212 pub fn line_number(&self, char_index: usize) -> NonZeroU64 {
213 let newlines = self
214 .value
215 .borrow()
216 .chars()
217 .take(char_index)
218 .filter(|c| *c == '\n')
219 .count()
220 .try_into()
221 .unwrap_or(u64::MAX);
222 self.start_line_number.saturating_add(newlines)
223 }
224}
225
226/// Position of source code
227#[derive(Clone, Debug, Eq, PartialEq)]
228pub struct Location {
229 /// Code that contains the character
230 pub code: Rc<Code>,
231
232 /// Character position in the code, counted from 0
233 ///
234 /// Characters are counted in the number of Unicode scalar values, not
235 /// bytes. That means the index should be between 0 and
236 /// `code.value.borrow().chars().count()`.
237 pub range: Range<usize>,
238}
239
240impl Location {
241 /// Creates a dummy location.
242 ///
243 /// The returned location has [unknown](Source::Unknown) source and the
244 /// given source code value. The `start_line_number` will be 1.
245 /// The location ranges over the whole code.
246 ///
247 /// This function is mainly for use in testing.
248 #[inline]
249 pub fn dummy<S: Into<String>>(value: S) -> Location {
250 fn with_line(value: String) -> Location {
251 let range = 0..value.chars().count();
252 let code = Rc::new(Code {
253 value: RefCell::new(value),
254 start_line_number: NonZeroU64::new(1).unwrap(),
255 source: Rc::new(Source::Unknown),
256 });
257 Location { code, range }
258 }
259 with_line(value.into())
260 }
261
262 /// Returns the byte range corresponding to this location's character range.
263 pub fn byte_range(&self) -> Range<usize> {
264 let s = self.code.value.borrow();
265 let mut chars = s.char_indices();
266 let start = chars
267 .nth(self.range.start)
268 .map(|(i, _)| i)
269 .unwrap_or(s.len());
270 let end = if self.range.is_empty() {
271 start
272 } else {
273 chars
274 .nth(self.range.end - self.range.start - 1)
275 .map(|(i, _)| i)
276 .unwrap_or(s.len())
277 };
278 start..end
279 }
280}
281
282#[cfg(test)]
283mod tests {
284 use super::*;
285
286 #[test]
287 fn line_number() {
288 let code = Code {
289 value: RefCell::new("a\nbc\nd".to_string()),
290 start_line_number: NonZeroU64::new(1).unwrap(),
291 source: Rc::new(Source::Unknown),
292 };
293 assert_eq!(code.line_number(0).get(), 1);
294 assert_eq!(code.line_number(1).get(), 1);
295 assert_eq!(code.line_number(2).get(), 2);
296 assert_eq!(code.line_number(3).get(), 2);
297 assert_eq!(code.line_number(4).get(), 2);
298 assert_eq!(code.line_number(5).get(), 3);
299 assert_eq!(code.line_number(6).get(), 3);
300 assert_eq!(code.line_number(7).get(), 3);
301 assert_eq!(code.line_number(usize::MAX).get(), 3);
302
303 let code = Code {
304 start_line_number: NonZeroU64::new(3).unwrap(),
305 ..code
306 };
307 assert_eq!(code.line_number(0).get(), 3);
308 assert_eq!(code.line_number(1).get(), 3);
309 assert_eq!(code.line_number(2).get(), 4);
310 assert_eq!(code.line_number(3).get(), 4);
311 assert_eq!(code.line_number(4).get(), 4);
312 assert_eq!(code.line_number(5).get(), 5);
313 assert_eq!(code.line_number(6).get(), 5);
314 assert_eq!(code.line_number(7).get(), 5);
315 assert_eq!(code.line_number(usize::MAX).get(), 5);
316 }
317
318 #[test]
319 fn byte_range() {
320 // Test with multi-byte UTF-8 characters
321 // "aℝ🦀bc" = 'a' (1 byte) + 'ℝ' (3 bytes) + '🦀' (4 bytes) +
322 // 'b' (1 byte) + 'c' (1 byte)
323 let code = Rc::new(Code {
324 value: RefCell::new("aℝ🦀bc".to_string()),
325 start_line_number: NonZeroU64::new(1).unwrap(),
326 source: Rc::new(Source::Unknown),
327 });
328
329 // Test range at start (ASCII character)
330 let location = Location {
331 code: Rc::clone(&code),
332 range: 0..1, // 'a'
333 };
334 assert_eq!(location.byte_range(), 0..1);
335
336 // Test range covering multi-byte character
337 let location = Location {
338 code: Rc::clone(&code),
339 range: 1..2, // 'ℝ' (3 bytes)
340 };
341 assert_eq!(location.byte_range(), 1..4);
342
343 // Test range covering 4-byte character
344 let location = Location {
345 code: Rc::clone(&code),
346 range: 2..3, // '🦀' (4 bytes)
347 };
348 assert_eq!(location.byte_range(), 4..8);
349
350 // Test range covering multiple characters including multi-byte
351 let location = Location {
352 code: Rc::clone(&code),
353 range: 1..4, // 'ℝ🦀b'
354 };
355 assert_eq!(location.byte_range(), 1..9);
356
357 // Test empty range
358 let location = Location {
359 code: Rc::clone(&code),
360 range: 2..2, // empty at '🦀' position
361 };
362 assert_eq!(location.byte_range(), 4..4);
363
364 // Test range at end
365 let location = Location {
366 code: Rc::clone(&code),
367 range: 4..5, // 'c'
368 };
369 assert_eq!(location.byte_range(), 9..10);
370
371 // Test range beyond end
372 let location = Location {
373 code: Rc::clone(&code),
374 range: 5..6, // beyond end
375 };
376 assert_eq!(location.byte_range(), 10..10);
377
378 // Test full range
379 let location = Location {
380 code: Rc::clone(&code),
381 range: 0..5, // entire string
382 };
383 assert_eq!(location.byte_range(), 0..10);
384 }
385}
386
387pub mod pretty;