Skip to main content

yash_builtin/read/
input.rs

1// This file is part of yash, an extended POSIX shell.
2// Copyright (C) 2023 WATANABE Yuki
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17//! Reading input
18
19use thiserror::Error;
20use yash_env::Env;
21use yash_env::io::Fd;
22use yash_env::prompt::GetPrompt;
23use yash_env::semantics::expansion::attr::AttrChar;
24use yash_env::semantics::expansion::attr::Origin;
25use yash_env::source::pretty::{Report, ReportType};
26use yash_env::system::{Errno, Fcntl, Isatty, Read, Write};
27
28/// Error reading from the standard input
29///
30/// This error is returned by [`read`] when an error occurs while reading from
31/// the standard input.
32#[derive(Clone, Debug, Eq, Error, PartialEq)]
33#[error("error reading from the standard input: {errno}")]
34pub struct Error {
35    #[from]
36    pub errno: Errno,
37}
38
39impl Error {
40    /// Converts this error to a report.
41    #[must_use]
42    pub fn to_report(&self) -> Report<'_> {
43        let mut report = Report::new();
44        report.r#type = ReportType::Error;
45        report.title = self.to_string().into();
46        report
47    }
48}
49
50impl<'a> From<&'a Error> for Report<'a> {
51    #[inline]
52    fn from(error: &'a Error) -> Self {
53        error.to_report()
54    }
55}
56
57fn quoted(value: char) -> AttrChar {
58    AttrChar {
59        value,
60        origin: Origin::SoftExpansion,
61        is_quoted: true,
62        is_quoting: false,
63    }
64}
65
66fn quoting(value: char) -> AttrChar {
67    AttrChar {
68        value,
69        origin: Origin::SoftExpansion,
70        is_quoted: false,
71        is_quoting: true,
72    }
73}
74
75fn plain(value: char) -> AttrChar {
76    AttrChar {
77        value,
78        origin: Origin::SoftExpansion,
79        is_quoted: false,
80        is_quoting: false,
81    }
82}
83
84/// Reads a line from the standard input.
85///
86/// This function reads a line from the standard input and returns a vector of
87/// [`AttrChar`]s representing the line. The line is terminated by the specified
88/// `delimiter` byte, which is not included in the returned vector.
89///
90/// If `is_raw` is `true`, the read line is not subject to backslash processing.
91/// Otherwise, backslash-newline pairs are treated as line continuations, and
92/// other backslashes are treated as quoting characters. On encountering a line
93/// continuation, this function removes the backslash-newline pair and continues
94/// reading the next line. When reading the second and subsequent lines, this
95/// function displays the value of the `PS2` variable as a prompt if the shell
96/// is interactive and the input is from a terminal. This requires a
97/// [`GetPrompt`] instance to be available in the environment's
98/// [`any`](Env::any) storage.
99///
100/// If successful, this function returns a vector of [`AttrChar`]s representing
101/// the line read and a boolean value indicating whether the line was terminated
102/// by a delimiter. If the end of the input is reached before finding a
103/// delimiter, the boolean value is `false`.
104pub async fn read<S>(
105    env: &mut Env<S>,
106    delimiter: u8,
107    is_raw: bool,
108) -> Result<(Vec<AttrChar>, bool), Error>
109where
110    S: Fcntl + Isatty + Read + Write + 'static,
111{
112    let mut result = Vec::new();
113
114    let newline_found = loop {
115        // TODO Read in bulk if the standard input is seekable
116        match read_char(env).await? {
117            None => break false,
118            Some(c) if c == delimiter.into() => break true,
119
120            // Backslash escape
121            Some('\\') if !is_raw => {
122                let c = read_char(env).await?;
123                if c == Some('\n') {
124                    // Line continuation
125                    print_prompt(env).await;
126                    continue;
127                }
128                result.push(quoting('\\'));
129                match c {
130                    None => break false,
131                    Some(c) => result.push(quoted(c)),
132                }
133            }
134
135            // Plain character
136            Some(c) => result.push(plain(c)),
137        }
138    };
139
140    Ok((result, newline_found))
141}
142
143/// Reads one character from the standard input.
144///
145/// This function reads a single UTF-8-encoded character from the standard
146/// input. If the standard input is empty, this function returns `Ok(None)`.
147/// If the input is not a valid UTF-8 sequence, this function returns an error.
148async fn read_char<S>(env: &mut Env<S>) -> Result<Option<char>, Error>
149where
150    S: Fcntl + Isatty + Read + Write,
151{
152    // Any character is at most 4 bytes in UTF-8.
153    let mut buffer = [0; 4];
154    let mut len = 0;
155    loop {
156        // Read from the standard input byte by byte so that we don't consume
157        // more than one character.
158        let byte = std::slice::from_mut(&mut buffer[len]);
159        let count = env.system.read(Fd::STDIN, byte).await?;
160        if count == 0 {
161            // End of input
162            return if len == 0 {
163                Ok(None)
164            } else {
165                // The input ended in the middle of a UTF-8 sequence.
166                Err(Errno::EILSEQ.into())
167            };
168        }
169        debug_assert_eq!(count, 1);
170        len += 1;
171
172        match std::str::from_utf8(&buffer[..len]) {
173            Ok(s) => {
174                let mut chars = s.chars();
175                // Since the buffer is not empty, there must be a character.
176                let c = chars.next().unwrap();
177                // And it must be the only character.
178                debug_assert_eq!(chars.next(), None);
179                return Ok(Some(c));
180            }
181            Err(e) => match e.error_len() {
182                None => {
183                    // The bytes in the buffer are incomplete for a UTF-8
184                    // character. Read more bytes.
185                    continue;
186                }
187                Some(_) => return Err(Errno::EILSEQ.into()),
188            },
189        }
190    }
191}
192
193/// Prints the prompt string for the continuation line.
194///
195/// This function prints the value of the `PS2` variable as a prompt for the
196/// continuation line. If the shell is not interactive or the standard input
197/// is not a terminal, this function does nothing.
198///
199/// This function requires a [`GetPrompt`] instance to be in the environment's
200/// [`any`](Env::any) storage. If no such instance is found, this function
201/// **panics**.
202async fn print_prompt<S>(env: &mut Env<S>)
203where
204    S: Fcntl + Isatty + Write + 'static,
205{
206    if !env.is_interactive() || !env.system.isatty(Fd::STDIN) {
207        return;
208    }
209
210    // Obtain the prompt string
211    let GetPrompt(get_prompt) = *env.any.get().expect("`GetPrompt` should be in `env.any`");
212    let mut context = yash_env::input::Context::default();
213    context.set_is_first_line(false);
214    let prompt = get_prompt(env, &context).await;
215
216    // Print the prompt
217    env.system.print_error(&prompt).await;
218}
219
220#[cfg(test)]
221mod tests {
222    use super::*;
223    use std::cell::RefCell;
224    use yash_env::system::r#virtual::FileBody;
225    use yash_env::system::r#virtual::SystemState;
226    use yash_env::test_helper::in_virtual_system;
227
228    fn set_stdin<B: Into<Vec<u8>>>(system: &RefCell<SystemState>, bytes: B) {
229        let state = system.borrow_mut();
230        let stdin = state.file_system.get("/dev/stdin").unwrap();
231        stdin.borrow_mut().body = FileBody::new(bytes);
232    }
233
234    fn attr_chars(s: &str) -> Vec<AttrChar> {
235        s.chars().map(plain).collect()
236    }
237
238    #[test]
239    fn empty_input() {
240        in_virtual_system(|mut env, _| async move {
241            let result = read(&mut env, b'\n', false).await;
242            assert_eq!(result, Ok((vec![], false)));
243        })
244    }
245
246    #[test]
247    fn non_empty_input() {
248        in_virtual_system(|mut env, system| async move {
249            set_stdin(&system, "foo\nbar\n");
250
251            let result = read(&mut env, b'\n', false).await;
252            assert_eq!(result, Ok((attr_chars("foo"), true)));
253
254            let result = read(&mut env, b'\n', false).await;
255            assert_eq!(result, Ok((attr_chars("bar"), true)));
256
257            let result = read(&mut env, b'\n', false).await;
258            assert_eq!(result, Ok((vec![], false)));
259        })
260    }
261
262    #[test]
263    fn input_without_newline() {
264        in_virtual_system(|mut env, system| async move {
265            set_stdin(&system, "newline");
266
267            let result = read(&mut env, b'\n', false).await;
268            assert_eq!(result, Ok((attr_chars("newline"), false)));
269
270            let result = read(&mut env, b'\n', false).await;
271            assert_eq!(result, Ok((vec![], false)));
272        })
273    }
274
275    #[test]
276    fn multibyte_characters() {
277        in_virtual_system(|mut env, system| async move {
278            set_stdin(&system, "©⁉😀\n");
279
280            let result = read(&mut env, b'\n', false).await;
281            assert_eq!(result, Ok((attr_chars("©⁉😀"), true)));
282        })
283    }
284
285    #[test]
286    fn nul_byte_delimiter() {
287        in_virtual_system(|mut env, system| async move {
288            set_stdin(&system, "foo\0bar\0");
289
290            let result = read(&mut env, b'\0', false).await;
291            assert_eq!(result, Ok((attr_chars("foo"), true)));
292
293            let result = read(&mut env, b'\0', false).await;
294            assert_eq!(result, Ok((attr_chars("bar"), true)));
295
296            let result = read(&mut env, b'\0', false).await;
297            assert_eq!(result, Ok((vec![], false)));
298        })
299    }
300
301    #[test]
302    fn alphabetic_delimiter() {
303        in_virtual_system(|mut env, system| async move {
304            set_stdin(&system, "foo\nbar\n");
305
306            let result = read(&mut env, b'a', false).await;
307            assert_eq!(result, Ok((attr_chars("foo\nb"), true)));
308
309            let result = read(&mut env, b'a', false).await;
310            assert_eq!(result, Ok((attr_chars("r\n"), false)));
311        })
312    }
313
314    #[test]
315    fn raw_mode() {
316        in_virtual_system(|mut env, system| async move {
317            set_stdin(&system, "\\foo\\\nbar\\\nbaz\n");
318
319            let result = read(&mut env, b'\n', true).await;
320            assert_eq!(result, Ok((attr_chars("\\foo\\"), true)));
321        })
322    }
323
324    #[test]
325    fn no_raw_mode() {
326        in_virtual_system(|mut env, system| async move {
327            set_stdin(&system, "\\foo\\\nbar\\\nbaz\n");
328
329            let result = read(&mut env, b'\n', false).await;
330            assert_eq!(
331                result,
332                Ok((
333                    vec![
334                        quoting('\\'),
335                        quoted('f'),
336                        plain('o'),
337                        plain('o'),
338                        plain('b'),
339                        plain('a'),
340                        plain('r'),
341                        plain('b'),
342                        plain('a'),
343                        plain('z'),
344                    ],
345                    true,
346                )),
347            );
348        })
349    }
350
351    #[test]
352    fn orphan_backslash() {
353        in_virtual_system(|mut env, system| async move {
354            set_stdin(&system, "foo\\");
355
356            let result = read(&mut env, b'\n', false).await;
357            assert_eq!(
358                result,
359                Ok((
360                    vec![plain('f'), plain('o'), plain('o'), quoting('\\')],
361                    false,
362                )),
363            );
364        })
365    }
366
367    #[test]
368    fn broken_utf8() {
369        in_virtual_system(|mut env, system| async move {
370            set_stdin(&system, *b"\xFF");
371
372            let result = read(&mut env, b'\n', false).await;
373            assert_eq!(result, Err(Errno::EILSEQ.into()));
374        });
375
376        in_virtual_system(|mut env, system| async move {
377            set_stdin(&system, *b"\xCF\xD0");
378
379            let result = read(&mut env, b'\n', false).await;
380            assert_eq!(result, Err(Errno::EILSEQ.into()));
381        });
382
383        in_virtual_system(|mut env, system| async move {
384            set_stdin(&system, *b"\xCF");
385
386            let result = read(&mut env, b'\n', false).await;
387            assert_eq!(result, Err(Errno::EILSEQ.into()));
388        });
389    }
390
391    // TODO Test PS2 prompt
392}