yash_syntax/
source.rs

1// This file is part of yash, an extended POSIX shell.
2// Copyright (C) 2020 WATANABE Yuki
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17//! Source code that is passed to the parser
18//!
19//! This module contains items representing information about the source code
20//! from which ASTs originate. [`Source`] identifies the origin of source code
21//! fragments contained in [`Code`]. A [`Location`] specifies a particular
22//! character in a `Code` instance. You can use the [`pretty`] submodule to
23//! format messages describing source code locations.
24
25pub mod pretty;
26
27use crate::alias::Alias;
28use std::cell::RefCell;
29use std::num::NonZeroU64;
30use std::ops::Range;
31use std::rc::Rc;
32
33/// Origin of source code
34#[derive(Clone, Debug, Eq, PartialEq)]
35#[non_exhaustive]
36pub enum Source {
37    /// Source code of unknown origin
38    ///
39    /// Normally you should not use this value, but it may be useful for quick debugging.
40    Unknown,
41
42    /// Standard input
43    Stdin,
44
45    /// Command string specified with the `-c` option on the shell startup
46    CommandString,
47
48    /// File specified on the shell startup
49    CommandFile { path: String },
50
51    /// Alias substitution
52    ///
53    /// This applies to a code fragment that replaced another as a result of alias substitution.
54    Alias {
55        /// Position of the original word that was replaced
56        original: Location,
57        /// Definition of the alias that was substituted
58        alias: Rc<Alias>,
59    },
60
61    /// Command substitution
62    CommandSubst { original: Location },
63
64    /// Arithmetic expansion
65    Arith { original: Location },
66
67    /// Command string executed by the `eval` built-in
68    Eval { original: Location },
69
70    /// File executed by the `.` (`source`) built-in
71    DotScript {
72        /// Pathname of the file
73        name: String,
74        /// Location of the simple command that invoked the `.` built-in
75        origin: Location,
76    },
77
78    /// Trap command
79    Trap {
80        /// Trap condition name, typically the signal name
81        condition: String,
82        /// Location of the simple command that has set this trap command
83        origin: Location,
84    },
85
86    /// Value of a variable
87    VariableValue {
88        /// Variable name
89        name: String,
90    },
91
92    /// File executed during shell startup
93    InitFile { path: String },
94
95    /// Other source
96    Other {
97        /// Label that describes the source
98        label: String,
99    },
100    // TODO More Source types
101}
102
103impl Source {
104    /// Tests if this source is alias substitution for the given name.
105    ///
106    /// Returns true if `self` is `Source::Alias` with the `name` or such an
107    /// original, recursively.
108    ///
109    /// ```
110    /// // `is_alias_for` returns false for sources other than an Alias
111    /// # use yash_syntax::source::Source;
112    /// assert_eq!(Source::Unknown.is_alias_for("foo"), false);
113    /// ```
114    ///
115    /// ```
116    /// // `is_alias_for` returns true if the names match
117    /// # use yash_syntax::source::*;
118    /// let original = Location::dummy("");
119    /// let alias = std::rc::Rc::new(yash_syntax::alias::Alias {
120    ///     name: "foo".to_string(),
121    ///     replacement: "".to_string(),
122    ///     global: false,
123    ///     origin: original.clone()
124    /// });
125    /// let source = Source::Alias { original, alias };
126    /// assert_eq!(source.is_alias_for("foo"), true);
127    /// assert_eq!(source.is_alias_for("bar"), false);
128    /// ```
129    ///
130    /// ```
131    /// // `is_alias_for` checks aliases recursively.
132    /// # use std::rc::Rc;
133    /// # use yash_syntax::source::*;
134    /// let original = Location::dummy("");
135    /// let alias = Rc::new(yash_syntax::alias::Alias {
136    ///     name: "foo".to_string(),
137    ///     replacement: "".to_string(),
138    ///     global: false,
139    ///     origin: original.clone(),
140    /// });
141    /// let source = Source::Alias { original, alias };
142    /// let alias = Rc::new(yash_syntax::alias::Alias {
143    ///     name: "bar".to_string(),
144    ///     replacement: "".to_string(),
145    ///     global: false,
146    ///     origin: Location::dummy(""),
147    /// });
148    /// let mut original = Location::dummy("");
149    /// Rc::make_mut(&mut original.code).source = Rc::new(source);
150    /// let source = Source::Alias { original, alias };
151    /// assert_eq!(source.is_alias_for("foo"), true);
152    /// assert_eq!(source.is_alias_for("bar"), true);
153    /// assert_eq!(source.is_alias_for("baz"), false);
154    /// ```
155    pub fn is_alias_for(&self, name: &str) -> bool {
156        if let Source::Alias { original, alias } = self {
157            alias.name == name || original.code.source.is_alias_for(name)
158        } else {
159            false
160        }
161    }
162
163    /// Returns a label that describes the source.
164    pub fn label(&self) -> &str {
165        use Source::*;
166        match self {
167            Unknown => "<?>",
168            Stdin => "<stdin>",
169            CommandString => "<command_string>",
170            CommandFile { path } => path,
171            Alias { .. } => "<alias>",
172            CommandSubst { .. } => "<command_substitution>",
173            Arith { .. } => "<arithmetic_expansion>",
174            Eval { .. } => "<eval>",
175            DotScript { name, .. } => name,
176            Trap { condition, .. } => condition,
177            VariableValue { name } => name,
178            InitFile { path } => path,
179            Other { label } => label,
180        }
181    }
182}
183
184/// Source code fragment
185///
186/// An instance of `Code` contains a block of the source code that was parsed to
187/// produce an AST.
188#[derive(Clone, Debug, Eq, PartialEq)]
189pub struct Code {
190    /// Content of the code, usually terminated by a newline
191    ///
192    /// The value is contained in a `RefCell` so that more lines can be appended
193    /// to the value as the parser reads input lines. It is not intended to be
194    /// mutably borrowed for other purposes.
195    pub value: RefCell<String>,
196
197    /// Line number of the first line of the code. Counted from 1.
198    pub start_line_number: NonZeroU64,
199
200    /// Origin of this code
201    pub source: Rc<Source>,
202}
203
204impl Code {
205    /// Computes the line number of the character at the given index.
206    ///
207    /// The index should be between 0 and `self.value.borrow().chars().count()`.
208    /// The return value is `self.start_line_number` plus the number of newlines
209    /// in `self.value` up to the character at `char_index`. If `char_index` is
210    /// out of bounds, the return value is for the last character.
211    ///
212    /// This function will panic if `self.value` has been mutually borrowed.
213    #[must_use]
214    pub fn line_number(&self, char_index: usize) -> NonZeroU64 {
215        let newlines = self
216            .value
217            .borrow()
218            .chars()
219            .take(char_index)
220            .filter(|c| *c == '\n')
221            .count()
222            .try_into()
223            .unwrap_or(u64::MAX);
224        self.start_line_number.saturating_add(newlines)
225    }
226}
227
228/// Creates an iterator of [source char](SourceChar)s from a string.
229///
230/// `index_offset` will be the index of the first source char's location.
231/// For each succeeding char, the index will be incremented by one.
232///
233/// ```
234/// # use yash_syntax::source::{Code, Source, source_chars};
235/// # use std::cell::RefCell;
236/// # use std::num::NonZeroU64;
237/// # use std::rc::Rc;
238/// let s = "abc";
239/// let code = Rc::new(Code {
240///     value: RefCell::new(s.to_string()),
241///     start_line_number: NonZeroU64::new(1).unwrap(),
242///     source: Rc::new(Source::Unknown),
243/// });
244/// let chars: Vec<_> = source_chars(s, &code, 10).collect();
245/// assert_eq!(chars[0].value, 'a');
246/// assert_eq!(chars[0].location.code, code);
247/// assert_eq!(chars[0].location.range, 10..11);
248/// assert_eq!(chars[1].value, 'b');
249/// assert_eq!(chars[1].location.code, code);
250/// assert_eq!(chars[1].location.range, 11..12);
251/// ```
252pub fn source_chars<'a>(
253    s: &'a str,
254    code: &'a Rc<Code>,
255    index_offset: usize,
256) -> impl Iterator<Item = SourceChar> + 'a {
257    s.chars().enumerate().map(move |(i, value)| SourceChar {
258        value,
259        location: Location {
260            code: Rc::clone(code),
261            range: index_offset + i..index_offset + i + 1,
262        },
263    })
264}
265
266/// Position of source code
267#[derive(Clone, Debug, Eq, PartialEq)]
268pub struct Location {
269    /// Code that contains the character
270    pub code: Rc<Code>,
271
272    /// Character position in the code, counted from 0
273    ///
274    /// Characters are counted in the number of Unicode scalar values, not
275    /// bytes. That means the index should be between 0 and
276    /// `code.value.borrow().chars().count()`.
277    pub range: Range<usize>,
278}
279
280impl Location {
281    /// Creates a dummy location.
282    ///
283    /// The returned location has [unknown](Source::Unknown) source and the
284    /// given source code value. The `start_line_number` will be 1.
285    /// The location ranges over the whole code.
286    ///
287    /// This function is mainly for use in testing.
288    #[inline]
289    pub fn dummy<S: Into<String>>(value: S) -> Location {
290        fn with_line(value: String) -> Location {
291            let range = 0..value.chars().count();
292            let code = Rc::new(Code {
293                value: RefCell::new(value),
294                start_line_number: NonZeroU64::new(1).unwrap(),
295                source: Rc::new(Source::Unknown),
296            });
297            Location { code, range }
298        }
299        with_line(value.into())
300    }
301}
302
303/// Character with source description
304#[derive(Clone, Debug, Eq, PartialEq)]
305pub struct SourceChar {
306    /// Character value
307    pub value: char,
308    /// Location of this character in source code
309    pub location: Location,
310}
311
312#[cfg(test)]
313mod tests {
314    use super::*;
315
316    #[test]
317    fn line_number() {
318        let code = Code {
319            value: RefCell::new("a\nbc\nd".to_string()),
320            start_line_number: NonZeroU64::new(1).unwrap(),
321            source: Rc::new(Source::Unknown),
322        };
323        assert_eq!(code.line_number(0).get(), 1);
324        assert_eq!(code.line_number(1).get(), 1);
325        assert_eq!(code.line_number(2).get(), 2);
326        assert_eq!(code.line_number(3).get(), 2);
327        assert_eq!(code.line_number(4).get(), 2);
328        assert_eq!(code.line_number(5).get(), 3);
329        assert_eq!(code.line_number(6).get(), 3);
330        assert_eq!(code.line_number(7).get(), 3);
331        assert_eq!(code.line_number(usize::MAX).get(), 3);
332
333        let code = Code {
334            start_line_number: NonZeroU64::new(3).unwrap(),
335            ..code
336        };
337        assert_eq!(code.line_number(0).get(), 3);
338        assert_eq!(code.line_number(1).get(), 3);
339        assert_eq!(code.line_number(2).get(), 4);
340        assert_eq!(code.line_number(3).get(), 4);
341        assert_eq!(code.line_number(4).get(), 4);
342        assert_eq!(code.line_number(5).get(), 5);
343        assert_eq!(code.line_number(6).get(), 5);
344        assert_eq!(code.line_number(7).get(), 5);
345        assert_eq!(code.line_number(usize::MAX).get(), 5);
346    }
347}