Skip to main content

yash_syntax/parser/lex/
heredoc.rs

1// This file is part of yash, an extended POSIX shell.
2// Copyright (C) 2021 WATANABE Yuki
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17//! Here-document content parser
18
19use super::Lexer;
20use crate::parser::core::Result;
21use crate::parser::error::Error;
22use crate::parser::error::SyntaxError;
23use crate::syntax::HereDoc;
24use crate::syntax::Text;
25use crate::syntax::TextUnit::{self, Literal};
26use crate::syntax::Unquote as _;
27
28const NEWLINE: char = '\n';
29
30/// Counts the number of leading literal tab characters in `i`.
31fn leading_tabs<'a, I: IntoIterator<Item = &'a TextUnit>>(i: I) -> usize {
32    i.into_iter()
33        .take_while(|&unit| unit == &Literal('\t'))
34        .count()
35}
36
37impl Lexer<'_> {
38    /// Reads a line literally.
39    ///
40    /// This function recognizes no quotes or expansions. Starting from the
41    /// current position, the line is read up to (but not including) the
42    /// terminating newline.
43    pub async fn line(&mut self) -> Result<String> {
44        let mut line = String::new();
45        let mut lexer = self.disable_line_continuation();
46        while let Some(c) = lexer.consume_char_if(|c| c != NEWLINE).await? {
47            line.push(c.value);
48        }
49        Lexer::enable_line_continuation(lexer);
50        Ok(line)
51    }
52
53    /// Parses the content of a here-document.
54    ///
55    /// This function reads here-document content corresponding to the
56    /// here-document operator represented by the argument and fills
57    /// `here_doc.content` with the results. The argument does not have to be
58    /// mutable because `here_doc.content` is a `RefCell`. Note that this
59    /// function will panic if `here_doc.content` has been borrowed, and that
60    /// this function keeps a borrow from `here_doc.content` until the returned
61    /// future resolves to the final result.
62    ///
63    /// In case of an error, partial results may be left in `here_doc.content`.
64    pub async fn here_doc_content(&mut self, here_doc: &HereDoc) -> Result<()> {
65        fn is_escapable(c: char) -> bool {
66            matches!(c, '$' | '`' | '\\')
67        }
68
69        let (delimiter_string, literal) = here_doc.delimiter.unquote();
70        // TODO Reject if the delimiter contains a newline
71        let mut content = Vec::new();
72        loop {
73            let (line_text, line_string) = if literal {
74                let line_string = self.line().await?;
75                let line_text = Text::from_literal_chars(line_string.chars());
76                (line_text, line_string)
77            } else {
78                let begin = self.index();
79                let line_text = self.text(|c| c == NEWLINE, is_escapable).await?;
80                let end = self.index();
81                let line_string = self.source_string(begin..end);
82                (line_text, line_string)
83            };
84
85            if !self.skip_if(|c| c == NEWLINE).await? {
86                let redir_op_location = here_doc.delimiter.location.clone();
87                let cause = SyntaxError::UnclosedHereDocContent { redir_op_location }.into();
88                let location = self.location().await?.clone();
89                return Err(Error { cause, location });
90            }
91
92            let skip_count = if here_doc.remove_tabs {
93                leading_tabs(&line_text.0)
94            } else {
95                0
96            };
97            if line_string[skip_count..] == delimiter_string {
98                break;
99            }
100
101            content.extend(line_text.0.into_iter().skip(skip_count));
102            content.push(Literal(NEWLINE));
103        }
104
105        here_doc
106            .content
107            .set(Text(content))
108            .expect("here-doc content should not be read more than once");
109        Ok(())
110    }
111}
112
113#[allow(
114    clippy::bool_assert_comparison,
115    reason = "to make the expected values clearer"
116)]
117#[cfg(test)]
118mod tests {
119    use super::*;
120    use crate::parser::error::ErrorCause;
121    use crate::source::Source;
122    use crate::syntax::TextUnit::*;
123    use assert_matches::assert_matches;
124    use futures_util::FutureExt as _;
125    use std::cell::OnceCell;
126
127    #[test]
128    fn leading_tabs_test() {
129        let c = leading_tabs(std::iter::empty());
130        assert_eq!(c, 0);
131        let c = leading_tabs(&[Literal('\t'), Literal('a')]);
132        assert_eq!(c, 1);
133        let c = leading_tabs(&[Literal('\t'), Literal('\t'), Literal('\t')]);
134        assert_eq!(c, 3);
135    }
136
137    #[test]
138    fn lexer_line() {
139        let mut lexer = Lexer::with_code("\n");
140        let line = lexer.line().now_or_never().unwrap().unwrap();
141        assert_eq!(line, "");
142
143        let mut lexer = Lexer::with_code("foo\n");
144        let line = lexer.line().now_or_never().unwrap().unwrap();
145        assert_eq!(line, "foo");
146        let next = lexer.peek_char().now_or_never().unwrap().unwrap().unwrap();
147        assert_eq!(next, '\n');
148    }
149
150    fn here_doc_operator(delimiter: &str, remove_tabs: bool) -> HereDoc {
151        HereDoc {
152            delimiter: delimiter.parse().unwrap(),
153            remove_tabs,
154            content: OnceCell::new(),
155        }
156    }
157
158    #[test]
159    fn lexer_here_doc_content_empty_content() {
160        let heredoc = here_doc_operator("END", false);
161
162        let mut lexer = Lexer::with_code("END\nX");
163        lexer
164            .here_doc_content(&heredoc)
165            .now_or_never()
166            .unwrap()
167            .unwrap();
168        assert_eq!(heredoc.delimiter.to_string(), "END");
169        assert_eq!(heredoc.remove_tabs, false);
170        assert_eq!(heredoc.content.get().unwrap().0, []);
171
172        let location = lexer.location().now_or_never().unwrap().unwrap();
173        assert_eq!(*location.code.value.borrow(), "END\nX");
174        assert_eq!(location.code.start_line_number.get(), 1);
175        assert_eq!(location.range, 4..5);
176    }
177
178    #[test]
179    fn lexer_here_doc_content_one_line_content() {
180        let heredoc = here_doc_operator("FOO", false);
181
182        let mut lexer = Lexer::with_code("content\nFOO\nX");
183        lexer
184            .here_doc_content(&heredoc)
185            .now_or_never()
186            .unwrap()
187            .unwrap();
188        assert_eq!(heredoc.delimiter.to_string(), "FOO");
189        assert_eq!(heredoc.remove_tabs, false);
190        assert_eq!(heredoc.content.get().unwrap().to_string(), "content\n");
191
192        let location = lexer.location().now_or_never().unwrap().unwrap();
193        assert_eq!(*location.code.value.borrow(), "content\nFOO\nX");
194        assert_eq!(location.code.start_line_number.get(), 1);
195        assert_eq!(location.range, 12..13);
196    }
197
198    #[test]
199    fn lexer_here_doc_content_long_content() {
200        let heredoc = here_doc_operator("BAR", false);
201
202        let mut lexer = Lexer::with_code("foo\n\tBAR\n\nbaz\nBAR\nX");
203        lexer
204            .here_doc_content(&heredoc)
205            .now_or_never()
206            .unwrap()
207            .unwrap();
208        assert_eq!(heredoc.delimiter.to_string(), "BAR");
209        assert_eq!(heredoc.remove_tabs, false);
210        assert_eq!(
211            heredoc.content.get().unwrap().to_string(),
212            "foo\n\tBAR\n\nbaz\n",
213        );
214
215        let location = lexer.location().now_or_never().unwrap().unwrap();
216        assert_eq!(*location.code.value.borrow(), "foo\n\tBAR\n\nbaz\nBAR\nX");
217        assert_eq!(location.code.start_line_number.get(), 1);
218        assert_eq!(location.range, 18..19);
219    }
220
221    #[test]
222    fn lexer_here_doc_content_escapes_with_unquoted_delimiter() {
223        let heredoc = here_doc_operator("END", false);
224
225        let mut lexer = Lexer::with_code(
226            r#"\a\$\"\'\`\\\
227X
228END
229"#,
230        );
231        lexer
232            .here_doc_content(&heredoc)
233            .now_or_never()
234            .unwrap()
235            .unwrap();
236        assert_eq!(
237            heredoc.content.get().unwrap().0,
238            [
239                Literal('\\'),
240                Literal('a'),
241                Backslashed('$'),
242                Literal('\\'),
243                Literal('"'),
244                Literal('\\'),
245                Literal('\''),
246                Backslashed('`'),
247                Backslashed('\\'),
248                Literal('X'),
249                Literal('\n'),
250            ]
251        );
252    }
253
254    #[test]
255    fn lexer_here_doc_content_escapes_with_quoted_delimiter() {
256        let heredoc = here_doc_operator(r"\END", false);
257
258        let mut lexer = Lexer::with_code(
259            r#"\a\$\"\'\`\\\
260X
261END
262"#,
263        );
264        lexer
265            .here_doc_content(&heredoc)
266            .now_or_never()
267            .unwrap()
268            .unwrap();
269        assert_eq!(
270            heredoc.content.get().unwrap().0,
271            [
272                Literal('\\'),
273                Literal('a'),
274                Literal('\\'),
275                Literal('$'),
276                Literal('\\'),
277                Literal('"'),
278                Literal('\\'),
279                Literal('\''),
280                Literal('\\'),
281                Literal('`'),
282                Literal('\\'),
283                Literal('\\'),
284                Literal('\\'),
285                Literal('\n'),
286                Literal('X'),
287                Literal('\n'),
288            ]
289        );
290    }
291
292    #[test]
293    fn lexer_here_doc_content_with_tabs_removed() {
294        let heredoc = here_doc_operator("BAR", true);
295
296        let mut lexer = Lexer::with_code("\t\t\tfoo\n\tBAR\n\nbaz\nBAR\nX");
297        lexer
298            .here_doc_content(&heredoc)
299            .now_or_never()
300            .unwrap()
301            .unwrap();
302        assert_eq!(heredoc.delimiter.to_string(), "BAR");
303        assert_eq!(heredoc.remove_tabs, true);
304        assert_eq!(heredoc.content.get().unwrap().to_string(), "foo\n");
305
306        let location = lexer.location().now_or_never().unwrap().unwrap();
307        assert_eq!(*location.code.value.borrow(), "\t\t\tfoo\n\tBAR\n\n");
308        assert_eq!(location.code.start_line_number.get(), 1);
309        assert_eq!(location.range, 12..13);
310    }
311
312    #[test]
313    fn lexer_here_doc_content_unclosed() {
314        let heredoc = here_doc_operator("END", false);
315
316        let mut lexer = Lexer::with_code("");
317        let e = lexer
318            .here_doc_content(&heredoc)
319            .now_or_never()
320            .unwrap()
321            .unwrap_err();
322        assert_matches!(e.cause,
323            ErrorCause::Syntax(SyntaxError::UnclosedHereDocContent { redir_op_location }) => {
324            assert_eq!(*redir_op_location.code.value.borrow(), "END");
325            assert_eq!(redir_op_location.code.start_line_number.get(), 1);
326            assert_eq!(*redir_op_location.code.source, Source::Unknown);
327            assert_eq!(redir_op_location.range, 0..3);
328        });
329        assert_eq!(*e.location.code.value.borrow(), "");
330        assert_eq!(e.location.code.start_line_number.get(), 1);
331        assert_eq!(*e.location.code.source, Source::Unknown);
332        assert_eq!(e.location.range, 0..0);
333    }
334}