term_transcript/test/parser/
mod.rs

1//! SVG parsing logic.
2
3use std::{
4    borrow::Cow,
5    error::Error as StdError,
6    fmt,
7    io::{self, BufRead},
8    mem,
9    num::ParseIntError,
10    str::{self, Utf8Error},
11};
12
13use quick_xml::{
14    encoding::EncodingError,
15    events::{attributes::Attributes, Event},
16    Reader as XmlReader,
17};
18use termcolor::WriteColor;
19
20#[cfg(test)]
21mod tests;
22mod text;
23
24use self::text::TextReadingState;
25use crate::{
26    test::color_diff::ColorSpan, ExitStatus, Interaction, TermOutput, Transcript, UserInput,
27};
28
29fn map_utf8_error(err: Utf8Error) -> quick_xml::Error {
30    quick_xml::Error::Encoding(EncodingError::Utf8(err))
31}
32
33/// Parsed terminal output.
34#[derive(Debug, Clone, Default)]
35pub struct Parsed {
36    pub(crate) plaintext: String,
37    pub(crate) color_spans: Vec<ColorSpan>,
38    pub(crate) html: String,
39}
40
41impl Parsed {
42    const DEFAULT: Self = Self {
43        plaintext: String::new(),
44        color_spans: Vec::new(),
45        html: String::new(),
46    };
47
48    /// Returns the parsed plaintext.
49    pub fn plaintext(&self) -> &str {
50        &self.plaintext
51    }
52
53    /// Writes the parsed text with coloring / styles applied.
54    ///
55    /// # Errors
56    ///
57    /// - Returns an I/O error should it occur when writing to `out`.
58    #[doc(hidden)] // makes `termcolor` dependency public, which we want to avoid so far
59    pub fn write_colorized(&self, out: &mut impl WriteColor) -> io::Result<()> {
60        ColorSpan::write_colorized(&self.color_spans, out, &self.plaintext)
61    }
62
63    /// Returns the parsed HTML.
64    pub fn html(&self) -> &str {
65        &self.html
66    }
67
68    /// Converts this parsed fragment into text for `UserInput`. This takes into account
69    /// that while the first space after prompt is inserted automatically, the further whitespace
70    /// may be significant.
71    fn into_input_text(self) -> String {
72        if self.plaintext.starts_with(' ') {
73            self.plaintext[1..].to_owned()
74        } else {
75            self.plaintext
76        }
77    }
78}
79
80impl TermOutput for Parsed {}
81
82impl Transcript<Parsed> {
83    /// Parses a transcript from the provided `reader`, which should point to an SVG XML tree
84    /// produced by [`Template::render()`] (possibly within a larger document).
85    ///
86    /// # Errors
87    ///
88    /// - Returns an error if the input cannot be parsed, usually because it was not produced
89    ///   by `Template::render()`.
90    ///
91    /// [`Template::render()`]: crate::svg::Template::render()
92    #[cfg_attr(feature = "tracing", tracing::instrument(skip_all, err))]
93    pub fn from_svg<R: BufRead>(reader: R) -> Result<Self, ParseError> {
94        let mut reader = XmlReader::from_reader(reader);
95        let mut buffer = vec![];
96        let mut state = ParserState::Initialized;
97        let mut transcript = Self::new();
98        let mut open_tags = 0;
99
100        loop {
101            let event = reader.read_event_into(&mut buffer)?;
102            match &event {
103                Event::Start(_) => {
104                    open_tags += 1;
105                }
106                Event::End(_) => {
107                    open_tags -= 1;
108                    if open_tags == 0 {
109                        break;
110                    }
111                }
112                Event::Eof => break,
113                _ => { /* Do nothing. */ }
114            }
115
116            if let Some(interaction) = state.process(event)? {
117                #[cfg(feature = "tracing")]
118                tracing::debug!(
119                    ?interaction.input,
120                    interaction.output = ?interaction.output.plaintext,
121                    ?interaction.exit_status,
122                    "parsed interaction"
123                );
124                transcript.interactions.push(interaction);
125            }
126        }
127
128        match state {
129            ParserState::EncounteredContainer => Ok(transcript),
130            ParserState::EncounteredUserInput(interaction) => {
131                transcript.interactions.push(interaction);
132                Ok(transcript)
133            }
134            _ => Err(ParseError::UnexpectedEof),
135        }
136    }
137}
138
139fn parse_classes(attributes: Attributes<'_>) -> Result<Cow<'_, [u8]>, ParseError> {
140    let mut class = None;
141    for attr in attributes {
142        let attr = attr.map_err(quick_xml::Error::InvalidAttr)?;
143        if attr.key.as_ref() == b"class" {
144            class = Some(attr.value);
145        }
146    }
147    Ok(class.unwrap_or(Cow::Borrowed(b"")))
148}
149
150fn extract_base_class(classes: &[u8]) -> &[u8] {
151    let space_idx = classes.iter().position(|&ch| ch == b' ');
152    space_idx.map_or(classes.as_ref(), |idx| &classes[..idx])
153}
154
155fn parse_exit_status(attributes: Attributes<'_>) -> Result<Option<ExitStatus>, ParseError> {
156    let mut exit_status = None;
157    for attr in attributes {
158        let attr = attr.map_err(quick_xml::Error::InvalidAttr)?;
159        if attr.key.as_ref() == b"data-exit-status" {
160            let status =
161                str::from_utf8(&attr.value).map_err(|err| ParseError::Xml(map_utf8_error(err)))?;
162            let status = status.parse().map_err(ParseError::InvalidExitStatus)?;
163            exit_status = Some(ExitStatus(status));
164        }
165    }
166    Ok(exit_status)
167}
168
169/// Errors that can occur during parsing SVG transcripts.
170#[derive(Debug)]
171#[non_exhaustive]
172pub enum ParseError {
173    /// Unexpected root XML tag; must be `<svg>`.
174    UnexpectedRoot(String),
175    /// Invalid transcript container.
176    InvalidContainer,
177    /// Invalid recorded exit status of an executed command.
178    InvalidExitStatus(ParseIntError),
179    /// Unexpected end of file.
180    UnexpectedEof,
181    /// Error parsing XML.
182    Xml(quick_xml::Error),
183}
184
185impl From<quick_xml::Error> for ParseError {
186    fn from(err: quick_xml::Error) -> Self {
187        Self::Xml(err)
188    }
189}
190
191impl From<io::Error> for ParseError {
192    fn from(err: io::Error) -> Self {
193        Self::Xml(err.into())
194    }
195}
196
197impl fmt::Display for ParseError {
198    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
199        match self {
200            Self::UnexpectedRoot(tag_name) => write!(
201                formatter,
202                "unexpected root XML tag: <{tag_name}>; expected <svg>"
203            ),
204            Self::InvalidContainer => formatter.write_str("invalid transcript container"),
205            Self::InvalidExitStatus(err) => write!(formatter, "invalid exit status: {err}"),
206            Self::UnexpectedEof => formatter.write_str("unexpected EOF"),
207            Self::Xml(err) => write!(formatter, "error parsing XML: {err}"),
208        }
209    }
210}
211
212impl StdError for ParseError {
213    fn source(&self) -> Option<&(dyn StdError + 'static)> {
214        match self {
215            Self::Xml(err) => Some(err),
216            Self::InvalidExitStatus(err) => Some(err),
217            _ => None,
218        }
219    }
220}
221
222#[derive(Debug)]
223struct UserInputState {
224    exit_status: Option<ExitStatus>,
225    is_hidden: bool,
226    text: TextReadingState,
227    prompt: Option<Cow<'static, str>>,
228    prompt_open_tags: Option<usize>,
229}
230
231impl UserInputState {
232    fn new(exit_status: Option<ExitStatus>, is_hidden: bool) -> Self {
233        Self {
234            exit_status,
235            is_hidden,
236            text: TextReadingState::default(),
237            prompt: None,
238            prompt_open_tags: None,
239        }
240    }
241}
242
243impl UserInputState {
244    /// Can prompt reading be started now?
245    fn can_start_prompt(&self) -> bool {
246        self.text.is_empty() && self.prompt.is_none() && self.prompt_open_tags.is_none()
247    }
248
249    fn can_end_prompt(&self) -> bool {
250        self.prompt.is_none()
251            && self
252                .prompt_open_tags
253                .is_some_and(|tags| tags + 1 == self.text.open_tags())
254    }
255
256    fn process(&mut self, event: Event<'_>) -> Result<Option<Interaction<Parsed>>, ParseError> {
257        let mut is_prompt_end = false;
258        if let Event::Start(tag) = &event {
259            if self.can_start_prompt() && parse_classes(tag.attributes())?.as_ref() == b"prompt" {
260                // Got prompt start.
261                self.prompt_open_tags = Some(self.text.open_tags());
262            }
263        } else if let Event::End(_) = &event {
264            if self.can_end_prompt() {
265                is_prompt_end = true;
266            }
267        }
268
269        let maybe_parsed = self.text.process(event)?;
270        if is_prompt_end {
271            if let Some(parsed) = maybe_parsed {
272                // Special case: user input consists of the prompt only.
273                let input = UserInput {
274                    text: String::new(),
275                    prompt: Some(UserInput::intern_prompt(parsed.plaintext)),
276                    hidden: self.is_hidden,
277                };
278                return Ok(Some(Interaction {
279                    input,
280                    output: Parsed::default(),
281                    exit_status: self.exit_status,
282                }));
283            }
284            let text = mem::take(&mut self.text.plaintext_buffer);
285            self.prompt = Some(UserInput::intern_prompt(text));
286        }
287
288        Ok(maybe_parsed.map(|parsed| {
289            let input = UserInput {
290                text: parsed.into_input_text(),
291                prompt: self.prompt.take(),
292                hidden: self.is_hidden,
293            };
294            Interaction {
295                input,
296                output: Parsed::default(),
297                exit_status: self.exit_status,
298            }
299        }))
300    }
301}
302
303/// States of the FSM for parsing SVGs.
304#[derive(Debug)]
305enum ParserState {
306    /// Initial state.
307    Initialized,
308    /// Encountered `<svg>` tag; searching for `<div class="container">`.
309    EncounteredSvgTag,
310    /// Encountered `<div class="container">`; searching for `<div class="input">`.
311    EncounteredContainer,
312    /// Reading user input (`<div class="input">` contents).
313    ReadingUserInput(UserInputState),
314    /// Finished reading user input; searching for `<div class="output">`.
315    EncounteredUserInput(Interaction<Parsed>),
316    /// Reading terminal output (`<div class="output">` contents).
317    ReadingTermOutput(Interaction<Parsed>, TextReadingState),
318}
319
320impl ParserState {
321    const DUMMY_INTERACTION: Interaction<Parsed> = Interaction {
322        input: UserInput {
323            text: String::new(),
324            prompt: None,
325            hidden: false,
326        },
327        output: Parsed::DEFAULT,
328        exit_status: None,
329    };
330
331    #[cfg_attr(feature = "tracing", tracing::instrument(level = "debug"))]
332    fn set_state(&mut self, new_state: Self) {
333        *self = new_state;
334    }
335
336    #[cfg_attr(feature = "tracing", tracing::instrument(level = "trace", err))]
337    fn process(&mut self, event: Event<'_>) -> Result<Option<Interaction<Parsed>>, ParseError> {
338        match self {
339            Self::Initialized => {
340                if let Event::Start(tag) = event {
341                    if tag.name().as_ref() == b"svg" {
342                        *self = Self::EncounteredSvgTag;
343                    } else {
344                        let tag_name = String::from_utf8_lossy(tag.name().as_ref()).into_owned();
345                        return Err(ParseError::UnexpectedRoot(tag_name));
346                    }
347                }
348            }
349
350            Self::EncounteredSvgTag => {
351                if let Event::Start(tag) = event {
352                    if tag.name().as_ref() == b"div" {
353                        Self::verify_container_attrs(tag.attributes())?;
354                        self.set_state(Self::EncounteredContainer);
355                    }
356                }
357            }
358
359            Self::EncounteredContainer => {
360                if let Event::Start(tag) = event {
361                    let classes = parse_classes(tag.attributes())?;
362                    if Self::is_input_class(extract_base_class(&classes)) {
363                        let is_hidden = classes
364                            .split(|byte| *byte == b' ')
365                            .any(|chunk| chunk == b"input-hidden");
366                        let exit_status = parse_exit_status(tag.attributes())?;
367                        self.set_state(Self::ReadingUserInput(UserInputState::new(
368                            exit_status,
369                            is_hidden,
370                        )));
371                    }
372                }
373            }
374
375            Self::ReadingUserInput(state) => {
376                if let Some(interaction) = state.process(event)? {
377                    self.set_state(Self::EncounteredUserInput(interaction));
378                }
379            }
380
381            Self::EncounteredUserInput(interaction) => {
382                if let Event::Start(tag) = event {
383                    let classes = parse_classes(tag.attributes())?;
384                    let base_class = extract_base_class(&classes);
385
386                    if Self::is_output_class(base_class) {
387                        let interaction = mem::replace(interaction, Self::DUMMY_INTERACTION);
388                        self.set_state(Self::ReadingTermOutput(
389                            interaction,
390                            TextReadingState::default(),
391                        ));
392                    } else if Self::is_input_class(base_class) {
393                        let interaction = mem::replace(interaction, Self::DUMMY_INTERACTION);
394                        let exit_status = parse_exit_status(tag.attributes())?;
395                        let is_hidden = classes
396                            .split(|byte| *byte == b' ')
397                            .any(|chunk| chunk == b"input-hidden");
398                        self.set_state(Self::ReadingUserInput(UserInputState::new(
399                            exit_status,
400                            is_hidden,
401                        )));
402                        return Ok(Some(interaction));
403                    }
404                }
405            }
406
407            Self::ReadingTermOutput(interaction, text_state) => {
408                if let Some(term_output) = text_state.process(event)? {
409                    let mut interaction = mem::replace(interaction, Self::DUMMY_INTERACTION);
410                    interaction.output = term_output;
411                    self.set_state(Self::EncounteredContainer);
412                    return Ok(Some(interaction));
413                }
414            }
415        }
416        Ok(None)
417    }
418
419    fn is_input_class(class_name: &[u8]) -> bool {
420        class_name == b"input" || class_name == b"user-input"
421    }
422
423    fn is_output_class(class_name: &[u8]) -> bool {
424        class_name == b"output" || class_name == b"term-output"
425    }
426
427    #[cfg_attr(
428        feature = "tracing",
429        tracing::instrument(level = "debug", skip_all, err)
430    )]
431    fn verify_container_attrs(attributes: Attributes<'_>) -> Result<(), ParseError> {
432        const HTML_NS: &[u8] = b"http://www.w3.org/1999/xhtml";
433
434        let mut has_ns_attribute = false;
435        let mut has_class_attribute = false;
436
437        for attr in attributes {
438            let attr = attr.map_err(quick_xml::Error::InvalidAttr)?;
439            match attr.key.as_ref() {
440                b"xmlns" => {
441                    if attr.value.as_ref() != HTML_NS {
442                        return Err(ParseError::InvalidContainer);
443                    }
444                    has_ns_attribute = true;
445                }
446                b"class" => {
447                    if attr.value.as_ref() != b"container" {
448                        return Err(ParseError::InvalidContainer);
449                    }
450                    has_class_attribute = true;
451                }
452                _ => { /* Do nothing. */ }
453            }
454        }
455
456        if has_ns_attribute && has_class_attribute {
457            Ok(())
458        } else {
459            Err(ParseError::InvalidContainer)
460        }
461    }
462}