yash_syntax/parser/lex/
heredoc.rs

1// This file is part of yash, an extended POSIX shell.
2// Copyright (C) 2021 WATANABE Yuki
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17//! Here-document content parser
18
19use super::Lexer;
20use crate::parser::core::Result;
21use crate::parser::error::Error;
22use crate::parser::error::SyntaxError;
23use crate::syntax::HereDoc;
24use crate::syntax::Text;
25use crate::syntax::TextUnit::{self, Literal};
26use crate::syntax::Unquote;
27
28const NEWLINE: char = '\n';
29
30/// Counts the number of leading literal tab characters in `i`.
31fn leading_tabs<'a, I: IntoIterator<Item = &'a TextUnit>>(i: I) -> usize {
32    i.into_iter()
33        .take_while(|&unit| unit == &Literal('\t'))
34        .count()
35}
36
37impl Lexer<'_> {
38    /// Reads a line literally.
39    ///
40    /// This function recognizes no quotes or expansions. Starting from the
41    /// current position, the line is read up to (but not including) the
42    /// terminating newline.
43    pub async fn line(&mut self) -> Result<String> {
44        let mut line = String::new();
45        let mut lexer = self.disable_line_continuation();
46        while let Some(c) = lexer.consume_char_if(|c| c != NEWLINE).await? {
47            line.push(c.value);
48        }
49        Lexer::enable_line_continuation(lexer);
50        Ok(line)
51    }
52
53    /// Parses the content of a here-document.
54    ///
55    /// This function reads here-document content corresponding to the
56    /// here-document operator represented by the argument and fills
57    /// `here_doc.content` with the results. The argument does not have to be
58    /// mutable because `here_doc.content` is a `RefCell`. Note that this
59    /// function will panic if `here_doc.content` has been borrowed, and that
60    /// this function keeps a borrow from `here_doc.content` until the returned
61    /// future resolves to the final result.
62    ///
63    /// In case of an error, partial results may be left in `here_doc.content`.
64    #[allow(clippy::await_holding_refcell_ref)]
65    pub async fn here_doc_content(&mut self, here_doc: &HereDoc) -> Result<()> {
66        fn is_escapable(c: char) -> bool {
67            matches!(c, '$' | '`' | '\\')
68        }
69
70        let (delimiter_string, literal) = here_doc.delimiter.unquote();
71        // TODO Reject if the delimiter contains a newline
72        let mut content = Vec::new();
73        loop {
74            let (line_text, line_string) = if literal {
75                let line_string = self.line().await?;
76                let line_text = Text::from_literal_chars(line_string.chars());
77                (line_text, line_string)
78            } else {
79                let begin = self.index();
80                let line_text = self.text(|c| c == NEWLINE, is_escapable).await?;
81                let end = self.index();
82                let line_string = self.source_string(begin..end);
83                (line_text, line_string)
84            };
85
86            if !self.skip_if(|c| c == NEWLINE).await? {
87                let redir_op_location = here_doc.delimiter.location.clone();
88                let cause = SyntaxError::UnclosedHereDocContent { redir_op_location }.into();
89                let location = self.location().await?.clone();
90                return Err(Error { cause, location });
91            }
92
93            let skip_count = if here_doc.remove_tabs {
94                leading_tabs(&line_text.0)
95            } else {
96                0
97            };
98            if line_string[skip_count..] == delimiter_string {
99                break;
100            }
101
102            content.extend(line_text.0.into_iter().skip(skip_count));
103            content.push(Literal(NEWLINE));
104        }
105
106        here_doc
107            .content
108            .set(Text(content))
109            .expect("here-doc content must be read just once");
110        Ok(())
111    }
112}
113
114#[allow(clippy::bool_assert_comparison)]
115#[cfg(test)]
116mod tests {
117    use super::*;
118    use crate::parser::error::ErrorCause;
119    use crate::source::Source;
120    use crate::syntax::TextUnit::*;
121    use assert_matches::assert_matches;
122    use futures_util::FutureExt;
123    use std::cell::OnceCell;
124
125    #[test]
126    fn leading_tabs_test() {
127        let c = leading_tabs(std::iter::empty());
128        assert_eq!(c, 0);
129        let c = leading_tabs(&[Literal('\t'), Literal('a')]);
130        assert_eq!(c, 1);
131        let c = leading_tabs(&[Literal('\t'), Literal('\t'), Literal('\t')]);
132        assert_eq!(c, 3);
133    }
134
135    #[test]
136    fn lexer_line() {
137        let mut lexer = Lexer::with_code("\n");
138        let line = lexer.line().now_or_never().unwrap().unwrap();
139        assert_eq!(line, "");
140
141        let mut lexer = Lexer::with_code("foo\n");
142        let line = lexer.line().now_or_never().unwrap().unwrap();
143        assert_eq!(line, "foo");
144        let next = lexer.peek_char().now_or_never().unwrap().unwrap().unwrap();
145        assert_eq!(next, '\n');
146    }
147
148    fn here_doc_operator(delimiter: &str, remove_tabs: bool) -> HereDoc {
149        HereDoc {
150            delimiter: delimiter.parse().unwrap(),
151            remove_tabs,
152            content: OnceCell::new(),
153        }
154    }
155
156    #[test]
157    fn lexer_here_doc_content_empty_content() {
158        let heredoc = here_doc_operator("END", false);
159
160        let mut lexer = Lexer::with_code("END\nX");
161        lexer
162            .here_doc_content(&heredoc)
163            .now_or_never()
164            .unwrap()
165            .unwrap();
166        assert_eq!(heredoc.delimiter.to_string(), "END");
167        assert_eq!(heredoc.remove_tabs, false);
168        assert_eq!(heredoc.content.get().unwrap().0, []);
169
170        let location = lexer.location().now_or_never().unwrap().unwrap();
171        assert_eq!(*location.code.value.borrow(), "END\nX");
172        assert_eq!(location.code.start_line_number.get(), 1);
173        assert_eq!(location.range, 4..5);
174    }
175
176    #[test]
177    fn lexer_here_doc_content_one_line_content() {
178        let heredoc = here_doc_operator("FOO", false);
179
180        let mut lexer = Lexer::with_code("content\nFOO\nX");
181        lexer
182            .here_doc_content(&heredoc)
183            .now_or_never()
184            .unwrap()
185            .unwrap();
186        assert_eq!(heredoc.delimiter.to_string(), "FOO");
187        assert_eq!(heredoc.remove_tabs, false);
188        assert_eq!(heredoc.content.get().unwrap().to_string(), "content\n");
189
190        let location = lexer.location().now_or_never().unwrap().unwrap();
191        assert_eq!(*location.code.value.borrow(), "content\nFOO\nX");
192        assert_eq!(location.code.start_line_number.get(), 1);
193        assert_eq!(location.range, 12..13);
194    }
195
196    #[test]
197    fn lexer_here_doc_content_long_content() {
198        let heredoc = here_doc_operator("BAR", false);
199
200        let mut lexer = Lexer::with_code("foo\n\tBAR\n\nbaz\nBAR\nX");
201        lexer
202            .here_doc_content(&heredoc)
203            .now_or_never()
204            .unwrap()
205            .unwrap();
206        assert_eq!(heredoc.delimiter.to_string(), "BAR");
207        assert_eq!(heredoc.remove_tabs, false);
208        assert_eq!(
209            heredoc.content.get().unwrap().to_string(),
210            "foo\n\tBAR\n\nbaz\n",
211        );
212
213        let location = lexer.location().now_or_never().unwrap().unwrap();
214        assert_eq!(*location.code.value.borrow(), "foo\n\tBAR\n\nbaz\nBAR\nX");
215        assert_eq!(location.code.start_line_number.get(), 1);
216        assert_eq!(location.range, 18..19);
217    }
218
219    #[test]
220    fn lexer_here_doc_content_escapes_with_unquoted_delimiter() {
221        let heredoc = here_doc_operator("END", false);
222
223        let mut lexer = Lexer::with_code(
224            r#"\a\$\"\'\`\\\
225X
226END
227"#,
228        );
229        lexer
230            .here_doc_content(&heredoc)
231            .now_or_never()
232            .unwrap()
233            .unwrap();
234        assert_eq!(
235            heredoc.content.get().unwrap().0,
236            [
237                Literal('\\'),
238                Literal('a'),
239                Backslashed('$'),
240                Literal('\\'),
241                Literal('"'),
242                Literal('\\'),
243                Literal('\''),
244                Backslashed('`'),
245                Backslashed('\\'),
246                Literal('X'),
247                Literal('\n'),
248            ]
249        );
250    }
251
252    #[test]
253    fn lexer_here_doc_content_escapes_with_quoted_delimiter() {
254        let heredoc = here_doc_operator(r"\END", false);
255
256        let mut lexer = Lexer::with_code(
257            r#"\a\$\"\'\`\\\
258X
259END
260"#,
261        );
262        lexer
263            .here_doc_content(&heredoc)
264            .now_or_never()
265            .unwrap()
266            .unwrap();
267        assert_eq!(
268            heredoc.content.get().unwrap().0,
269            [
270                Literal('\\'),
271                Literal('a'),
272                Literal('\\'),
273                Literal('$'),
274                Literal('\\'),
275                Literal('"'),
276                Literal('\\'),
277                Literal('\''),
278                Literal('\\'),
279                Literal('`'),
280                Literal('\\'),
281                Literal('\\'),
282                Literal('\\'),
283                Literal('\n'),
284                Literal('X'),
285                Literal('\n'),
286            ]
287        );
288    }
289
290    #[test]
291    fn lexer_here_doc_content_with_tabs_removed() {
292        let heredoc = here_doc_operator("BAR", true);
293
294        let mut lexer = Lexer::with_code("\t\t\tfoo\n\tBAR\n\nbaz\nBAR\nX");
295        lexer
296            .here_doc_content(&heredoc)
297            .now_or_never()
298            .unwrap()
299            .unwrap();
300        assert_eq!(heredoc.delimiter.to_string(), "BAR");
301        assert_eq!(heredoc.remove_tabs, true);
302        assert_eq!(heredoc.content.get().unwrap().to_string(), "foo\n");
303
304        let location = lexer.location().now_or_never().unwrap().unwrap();
305        assert_eq!(*location.code.value.borrow(), "\t\t\tfoo\n\tBAR\n\n");
306        assert_eq!(location.code.start_line_number.get(), 1);
307        assert_eq!(location.range, 12..13);
308    }
309
310    #[test]
311    fn lexer_here_doc_content_unclosed() {
312        let heredoc = here_doc_operator("END", false);
313
314        let mut lexer = Lexer::with_code("");
315        let e = lexer
316            .here_doc_content(&heredoc)
317            .now_or_never()
318            .unwrap()
319            .unwrap_err();
320        assert_matches!(e.cause,
321            ErrorCause::Syntax(SyntaxError::UnclosedHereDocContent { redir_op_location }) => {
322            assert_eq!(*redir_op_location.code.value.borrow(), "END");
323            assert_eq!(redir_op_location.code.start_line_number.get(), 1);
324            assert_eq!(*redir_op_location.code.source, Source::Unknown);
325            assert_eq!(redir_op_location.range, 0..3);
326        });
327        assert_eq!(*e.location.code.value.borrow(), "");
328        assert_eq!(e.location.code.start_line_number.get(), 1);
329        assert_eq!(*e.location.code.source, Source::Unknown);
330        assert_eq!(e.location.range, 0..0);
331    }
332}