yash_env/
source.rs

1// This file is part of yash, an extended POSIX shell.
2// Copyright (C) 2020 WATANABE Yuki
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17//! Source code that is passed to the parser
18//!
19//! This module contains items representing information about source code from
20//! which ASTs originate. [`Source`] identifies the origin of source code
21//! fragments contained in [`Code`]. A [`Location`] specifies a particular
22//! character in a `Code` instance. You can use the [`pretty`] submodule to
23//! format messages describing source code locations.
24
25use crate::alias::Alias;
26use std::cell::RefCell;
27use std::num::NonZeroU64;
28use std::ops::Range;
29use std::rc::Rc;
30
31/// Origin of source code
32#[derive(Clone, Debug, Eq, PartialEq)]
33#[non_exhaustive]
34pub enum Source {
35    /// Source code of unknown origin
36    ///
37    /// Normally you should not use this value, but it may be useful for quick debugging.
38    Unknown,
39
40    /// Standard input
41    Stdin,
42
43    /// Command string specified with the `-c` option on the shell startup
44    CommandString,
45
46    /// File specified on the shell startup
47    CommandFile { path: String },
48
49    /// Alias substitution
50    ///
51    /// This applies to a code fragment that replaced another as a result of alias substitution.
52    Alias {
53        /// Position of the original word that was replaced
54        original: Location,
55        /// Definition of the alias that was substituted
56        alias: Rc<Alias>,
57    },
58
59    /// Command substitution
60    CommandSubst { original: Location },
61
62    /// Arithmetic expansion
63    Arith { original: Location },
64
65    /// Command string executed by the `eval` built-in
66    Eval { original: Location },
67
68    /// File executed by the `.` (`source`) built-in
69    DotScript {
70        /// Pathname of the file
71        name: String,
72        /// Location of the simple command that invoked the `.` built-in
73        origin: Location,
74    },
75
76    /// Trap command
77    Trap {
78        /// Trap condition name, typically the signal name
79        condition: String,
80        /// Location of the simple command that has set this trap command
81        origin: Location,
82    },
83
84    /// Value of a variable
85    VariableValue {
86        /// Variable name
87        name: String,
88    },
89
90    /// File executed during shell startup
91    InitFile { path: String },
92
93    /// Other source
94    Other {
95        /// Label that describes the source
96        label: String,
97    },
98    // TODO More Source types
99}
100
101impl Source {
102    /// Tests if this source is alias substitution for the given name.
103    ///
104    /// Returns true if `self` is `Source::Alias` with the `name` or such an
105    /// original, recursively.
106    ///
107    /// ```
108    /// // `is_alias_for` returns false for sources other than an Alias
109    /// # use yash_env::source::Source;
110    /// assert_eq!(Source::Unknown.is_alias_for("foo"), false);
111    /// ```
112    ///
113    /// ```
114    /// // `is_alias_for` returns true if the names match
115    /// # use yash_env::source::*;
116    /// let original = Location::dummy("");
117    /// let alias = std::rc::Rc::new(yash_env::alias::Alias {
118    ///     name: "foo".to_string(),
119    ///     replacement: "".to_string(),
120    ///     global: false,
121    ///     origin: original.clone()
122    /// });
123    /// let source = Source::Alias { original, alias };
124    /// assert_eq!(source.is_alias_for("foo"), true);
125    /// assert_eq!(source.is_alias_for("bar"), false);
126    /// ```
127    ///
128    /// ```
129    /// // `is_alias_for` checks aliases recursively.
130    /// # use std::rc::Rc;
131    /// # use yash_env::source::*;
132    /// let original = Location::dummy("");
133    /// let alias = Rc::new(yash_env::alias::Alias {
134    ///     name: "foo".to_string(),
135    ///     replacement: "".to_string(),
136    ///     global: false,
137    ///     origin: original.clone(),
138    /// });
139    /// let source = Source::Alias { original, alias };
140    /// let alias = Rc::new(yash_env::alias::Alias {
141    ///     name: "bar".to_string(),
142    ///     replacement: "".to_string(),
143    ///     global: false,
144    ///     origin: Location::dummy(""),
145    /// });
146    /// let mut original = Location::dummy("");
147    /// Rc::make_mut(&mut original.code).source = Rc::new(source);
148    /// let source = Source::Alias { original, alias };
149    /// assert_eq!(source.is_alias_for("foo"), true);
150    /// assert_eq!(source.is_alias_for("bar"), true);
151    /// assert_eq!(source.is_alias_for("baz"), false);
152    /// ```
153    pub fn is_alias_for(&self, name: &str) -> bool {
154        if let Source::Alias { original, alias } = self {
155            alias.name == name || original.code.source.is_alias_for(name)
156        } else {
157            false
158        }
159    }
160
161    /// Returns a label that describes the source.
162    pub fn label(&self) -> &str {
163        use Source::*;
164        match self {
165            Unknown => "<?>",
166            Stdin => "<stdin>",
167            CommandString => "<command_string>",
168            CommandFile { path } => path,
169            Alias { .. } => "<alias>",
170            CommandSubst { .. } => "<command_substitution>",
171            Arith { .. } => "<arithmetic_expansion>",
172            Eval { .. } => "<eval>",
173            DotScript { name, .. } => name,
174            Trap { condition, .. } => condition,
175            VariableValue { name } => name,
176            InitFile { path } => path,
177            Other { label } => label,
178        }
179    }
180}
181
182/// Source code fragment
183///
184/// An instance of `Code` contains a block of the source code that was parsed to
185/// produce an AST.
186#[derive(Clone, Debug, Eq, PartialEq)]
187pub struct Code {
188    /// Content of the code, usually terminated by a newline
189    ///
190    /// The value is contained in a `RefCell` so that more lines can be appended
191    /// to the value as the parser reads input lines. It is not intended to be
192    /// mutably borrowed for other purposes.
193    pub value: RefCell<String>,
194
195    /// Line number of the first line of the code. Counted from 1.
196    pub start_line_number: NonZeroU64,
197
198    /// Origin of this code
199    pub source: Rc<Source>,
200}
201
202impl Code {
203    /// Computes the line number of the character at the given index.
204    ///
205    /// The index should be between 0 and `self.value.borrow().chars().count()`.
206    /// The return value is `self.start_line_number` plus the number of newlines
207    /// in `self.value` up to the character at `char_index`. If `char_index` is
208    /// out of bounds, the return value is for the last character.
209    ///
210    /// This function will panic if `self.value` has been mutually borrowed.
211    #[must_use]
212    pub fn line_number(&self, char_index: usize) -> NonZeroU64 {
213        let newlines = self
214            .value
215            .borrow()
216            .chars()
217            .take(char_index)
218            .filter(|c| *c == '\n')
219            .count()
220            .try_into()
221            .unwrap_or(u64::MAX);
222        self.start_line_number.saturating_add(newlines)
223    }
224}
225
226/// Position of source code
227#[derive(Clone, Debug, Eq, PartialEq)]
228pub struct Location {
229    /// Code that contains the character
230    pub code: Rc<Code>,
231
232    /// Character position in the code, counted from 0
233    ///
234    /// Characters are counted in the number of Unicode scalar values, not
235    /// bytes. That means the index should be between 0 and
236    /// `code.value.borrow().chars().count()`.
237    pub range: Range<usize>,
238}
239
240impl Location {
241    /// Creates a dummy location.
242    ///
243    /// The returned location has [unknown](Source::Unknown) source and the
244    /// given source code value. The `start_line_number` will be 1.
245    /// The location ranges over the whole code.
246    ///
247    /// This function is mainly for use in testing.
248    #[inline]
249    pub fn dummy<S: Into<String>>(value: S) -> Location {
250        fn with_line(value: String) -> Location {
251            let range = 0..value.chars().count();
252            let code = Rc::new(Code {
253                value: RefCell::new(value),
254                start_line_number: NonZeroU64::new(1).unwrap(),
255                source: Rc::new(Source::Unknown),
256            });
257            Location { code, range }
258        }
259        with_line(value.into())
260    }
261
262    /// Returns the byte range corresponding to this location's character range.
263    pub fn byte_range(&self) -> Range<usize> {
264        let s = self.code.value.borrow();
265        let mut chars = s.char_indices();
266        let start = chars
267            .nth(self.range.start)
268            .map(|(i, _)| i)
269            .unwrap_or(s.len());
270        let end = if self.range.is_empty() {
271            start
272        } else {
273            chars
274                .nth(self.range.end - self.range.start - 1)
275                .map(|(i, _)| i)
276                .unwrap_or(s.len())
277        };
278        start..end
279    }
280}
281
282#[cfg(test)]
283mod tests {
284    use super::*;
285
286    #[test]
287    fn line_number() {
288        let code = Code {
289            value: RefCell::new("a\nbc\nd".to_string()),
290            start_line_number: NonZeroU64::new(1).unwrap(),
291            source: Rc::new(Source::Unknown),
292        };
293        assert_eq!(code.line_number(0).get(), 1);
294        assert_eq!(code.line_number(1).get(), 1);
295        assert_eq!(code.line_number(2).get(), 2);
296        assert_eq!(code.line_number(3).get(), 2);
297        assert_eq!(code.line_number(4).get(), 2);
298        assert_eq!(code.line_number(5).get(), 3);
299        assert_eq!(code.line_number(6).get(), 3);
300        assert_eq!(code.line_number(7).get(), 3);
301        assert_eq!(code.line_number(usize::MAX).get(), 3);
302
303        let code = Code {
304            start_line_number: NonZeroU64::new(3).unwrap(),
305            ..code
306        };
307        assert_eq!(code.line_number(0).get(), 3);
308        assert_eq!(code.line_number(1).get(), 3);
309        assert_eq!(code.line_number(2).get(), 4);
310        assert_eq!(code.line_number(3).get(), 4);
311        assert_eq!(code.line_number(4).get(), 4);
312        assert_eq!(code.line_number(5).get(), 5);
313        assert_eq!(code.line_number(6).get(), 5);
314        assert_eq!(code.line_number(7).get(), 5);
315        assert_eq!(code.line_number(usize::MAX).get(), 5);
316    }
317
318    #[test]
319    fn byte_range() {
320        // Test with multi-byte UTF-8 characters
321        // "aℝ🦀bc" = 'a' (1 byte) + 'ℝ' (3 bytes) + '🦀' (4 bytes) +
322        //             'b' (1 byte) + 'c' (1 byte)
323        let code = Rc::new(Code {
324            value: RefCell::new("aℝ🦀bc".to_string()),
325            start_line_number: NonZeroU64::new(1).unwrap(),
326            source: Rc::new(Source::Unknown),
327        });
328
329        // Test range at start (ASCII character)
330        let location = Location {
331            code: Rc::clone(&code),
332            range: 0..1, // 'a'
333        };
334        assert_eq!(location.byte_range(), 0..1);
335
336        // Test range covering multi-byte character
337        let location = Location {
338            code: Rc::clone(&code),
339            range: 1..2, // 'ℝ' (3 bytes)
340        };
341        assert_eq!(location.byte_range(), 1..4);
342
343        // Test range covering 4-byte character
344        let location = Location {
345            code: Rc::clone(&code),
346            range: 2..3, // '🦀' (4 bytes)
347        };
348        assert_eq!(location.byte_range(), 4..8);
349
350        // Test range covering multiple characters including multi-byte
351        let location = Location {
352            code: Rc::clone(&code),
353            range: 1..4, // 'ℝ🦀b'
354        };
355        assert_eq!(location.byte_range(), 1..9);
356
357        // Test empty range
358        let location = Location {
359            code: Rc::clone(&code),
360            range: 2..2, // empty at '🦀' position
361        };
362        assert_eq!(location.byte_range(), 4..4);
363
364        // Test range at end
365        let location = Location {
366            code: Rc::clone(&code),
367            range: 4..5, // 'c'
368        };
369        assert_eq!(location.byte_range(), 9..10);
370
371        // Test range beyond end
372        let location = Location {
373            code: Rc::clone(&code),
374            range: 5..6, // beyond end
375        };
376        assert_eq!(location.byte_range(), 10..10);
377
378        // Test full range
379        let location = Location {
380            code: Rc::clone(&code),
381            range: 0..5, // entire string
382        };
383        assert_eq!(location.byte_range(), 0..10);
384    }
385}
386
387pub mod pretty;