term_transcript/test/parser/
mod.rs

1//! SVG parsing logic.
2
3use quick_xml::{
4    events::{attributes::Attributes, Event},
5    Reader as XmlReader,
6};
7use termcolor::WriteColor;
8
9use std::{
10    borrow::Cow,
11    error::Error as StdError,
12    fmt,
13    io::{self, BufRead},
14    mem,
15    num::ParseIntError,
16    str,
17};
18
19#[cfg(test)]
20mod tests;
21mod text;
22
23use self::text::TextReadingState;
24use crate::{
25    test::color_diff::ColorSpan, ExitStatus, Interaction, TermOutput, Transcript, UserInput,
26};
27
28/// Parsed terminal output.
29#[derive(Debug, Clone, Default)]
30pub struct Parsed {
31    pub(crate) plaintext: String,
32    pub(crate) color_spans: Vec<ColorSpan>,
33    pub(crate) html: String,
34}
35
36impl Parsed {
37    const DEFAULT: Self = Self {
38        plaintext: String::new(),
39        color_spans: Vec::new(),
40        html: String::new(),
41    };
42
43    /// Returns the parsed plaintext.
44    pub fn plaintext(&self) -> &str {
45        &self.plaintext
46    }
47
48    /// Writes the parsed text with coloring / styles applied.
49    ///
50    /// # Errors
51    ///
52    /// - Returns an I/O error should it occur when writing to `out`.
53    #[doc(hidden)] // makes `termcolor` dependency public, which we want to avoid so far
54    pub fn write_colorized(&self, out: &mut impl WriteColor) -> io::Result<()> {
55        ColorSpan::write_colorized(&self.color_spans, out, &self.plaintext)
56    }
57
58    /// Returns the parsed HTML.
59    pub fn html(&self) -> &str {
60        &self.html
61    }
62
63    /// Converts this parsed fragment into text for `UserInput`. This takes into account
64    /// that while the first space after prompt is inserted automatically, the further whitespace
65    /// may be significant.
66    fn into_input_text(self) -> String {
67        if self.plaintext.starts_with(' ') {
68            self.plaintext[1..].to_owned()
69        } else {
70            self.plaintext
71        }
72    }
73}
74
75impl TermOutput for Parsed {}
76
77impl Transcript<Parsed> {
78    /// Parses a transcript from the provided `reader`, which should point to an SVG XML tree
79    /// produced by [`Template::render()`] (possibly within a larger document).
80    ///
81    /// # Errors
82    ///
83    /// - Returns an error if the input cannot be parsed, usually because it was not produced
84    ///   by `Template::render()`.
85    ///
86    /// [`Template::render()`]: crate::svg::Template::render()
87    #[cfg_attr(feature = "tracing", tracing::instrument(skip_all, err))]
88    pub fn from_svg<R: BufRead>(reader: R) -> Result<Self, ParseError> {
89        let mut reader = XmlReader::from_reader(reader);
90        let mut buffer = vec![];
91        let mut state = ParserState::Initialized;
92        let mut transcript = Self::new();
93        let mut open_tags = 0;
94
95        loop {
96            let event = reader.read_event_into(&mut buffer)?;
97            match &event {
98                Event::Start(_) => {
99                    open_tags += 1;
100                }
101                Event::End(_) => {
102                    open_tags -= 1;
103                    if open_tags == 0 {
104                        break;
105                    }
106                }
107                Event::Eof => break,
108                _ => { /* Do nothing. */ }
109            }
110
111            if let Some(interaction) = state.process(event)? {
112                #[cfg(feature = "tracing")]
113                tracing::debug!(
114                    ?interaction.input,
115                    interaction.output = ?interaction.output.plaintext,
116                    ?interaction.exit_status,
117                    "parsed interaction"
118                );
119                transcript.interactions.push(interaction);
120            }
121        }
122
123        match state {
124            ParserState::EncounteredContainer => Ok(transcript),
125            ParserState::EncounteredUserInput(interaction) => {
126                transcript.interactions.push(interaction);
127                Ok(transcript)
128            }
129            _ => Err(ParseError::UnexpectedEof),
130        }
131    }
132}
133
134fn parse_classes(attributes: Attributes<'_>) -> Result<Cow<'_, [u8]>, ParseError> {
135    let mut class = None;
136    for attr in attributes {
137        let attr = attr.map_err(quick_xml::Error::InvalidAttr)?;
138        if attr.key.as_ref() == b"class" {
139            class = Some(attr.value);
140        }
141    }
142    Ok(class.unwrap_or(Cow::Borrowed(b"")))
143}
144
145fn extract_base_class(classes: &[u8]) -> &[u8] {
146    let space_idx = classes.iter().position(|&ch| ch == b' ');
147    space_idx.map_or(classes.as_ref(), |idx| &classes[..idx])
148}
149
150fn parse_exit_status(attributes: Attributes<'_>) -> Result<Option<ExitStatus>, ParseError> {
151    let mut exit_status = None;
152    for attr in attributes {
153        let attr = attr.map_err(quick_xml::Error::InvalidAttr)?;
154        if attr.key.as_ref() == b"data-exit-status" {
155            let status = str::from_utf8(&attr.value).map_err(|err| ParseError::Xml(err.into()))?;
156            let status = status.parse().map_err(ParseError::InvalidExitStatus)?;
157            exit_status = Some(ExitStatus(status));
158        }
159    }
160    Ok(exit_status)
161}
162
163/// Errors that can occur during parsing SVG transcripts.
164#[derive(Debug)]
165#[non_exhaustive]
166pub enum ParseError {
167    /// Unexpected root XML tag; must be `<svg>`.
168    UnexpectedRoot(String),
169    /// Invalid transcript container.
170    InvalidContainer,
171    /// Invalid recorded exit status of an executed command.
172    InvalidExitStatus(ParseIntError),
173    /// Unexpected end of file.
174    UnexpectedEof,
175    /// Error parsing XML.
176    Xml(quick_xml::Error),
177}
178
179impl From<quick_xml::Error> for ParseError {
180    fn from(err: quick_xml::Error) -> Self {
181        Self::Xml(err)
182    }
183}
184
185impl From<io::Error> for ParseError {
186    fn from(err: io::Error) -> Self {
187        Self::Xml(err.into())
188    }
189}
190
191impl fmt::Display for ParseError {
192    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
193        match self {
194            Self::UnexpectedRoot(tag_name) => write!(
195                formatter,
196                "unexpected root XML tag: <{tag_name}>; expected <svg>"
197            ),
198            Self::InvalidContainer => formatter.write_str("invalid transcript container"),
199            Self::InvalidExitStatus(err) => write!(formatter, "invalid exit status: {err}"),
200            Self::UnexpectedEof => formatter.write_str("unexpected EOF"),
201            Self::Xml(err) => write!(formatter, "error parsing XML: {err}"),
202        }
203    }
204}
205
206impl StdError for ParseError {
207    fn source(&self) -> Option<&(dyn StdError + 'static)> {
208        match self {
209            Self::Xml(err) => Some(err),
210            Self::InvalidExitStatus(err) => Some(err),
211            _ => None,
212        }
213    }
214}
215
216#[derive(Debug)]
217struct UserInputState {
218    exit_status: Option<ExitStatus>,
219    text: TextReadingState,
220    prompt: Option<Cow<'static, str>>,
221    prompt_open_tags: Option<usize>,
222}
223
224impl UserInputState {
225    fn new(exit_status: Option<ExitStatus>) -> Self {
226        Self {
227            exit_status,
228            text: TextReadingState::default(),
229            prompt: None,
230            prompt_open_tags: None,
231        }
232    }
233}
234
235impl UserInputState {
236    /// Can prompt reading be started now?
237    fn can_start_prompt(&self) -> bool {
238        self.text.is_empty() && self.prompt.is_none() && self.prompt_open_tags.is_none()
239    }
240
241    fn can_end_prompt(&self) -> bool {
242        self.prompt.is_none()
243            && self
244                .prompt_open_tags
245                .map_or(false, |tags| tags + 1 == self.text.open_tags())
246    }
247
248    fn process(&mut self, event: Event<'_>) -> Result<Option<Interaction<Parsed>>, ParseError> {
249        let mut is_prompt_end = false;
250        if let Event::Start(tag) = &event {
251            if self.can_start_prompt() && parse_classes(tag.attributes())?.as_ref() == b"prompt" {
252                // Got prompt start.
253                self.prompt_open_tags = Some(self.text.open_tags());
254            }
255        } else if let Event::End(_) = &event {
256            if self.can_end_prompt() {
257                is_prompt_end = true;
258            }
259        }
260
261        let maybe_parsed = self.text.process(event)?;
262        if is_prompt_end {
263            if let Some(parsed) = maybe_parsed {
264                // Special case: user input consists of the prompt only.
265                let input = UserInput {
266                    text: String::new(),
267                    prompt: Some(UserInput::intern_prompt(parsed.plaintext)),
268                    hidden: false,
269                };
270                return Ok(Some(Interaction {
271                    input,
272                    output: Parsed::default(),
273                    exit_status: self.exit_status,
274                }));
275            }
276            let text = mem::take(&mut self.text.plaintext_buffer);
277            self.prompt = Some(UserInput::intern_prompt(text));
278        }
279
280        Ok(maybe_parsed.map(|parsed| {
281            let input = UserInput {
282                text: parsed.into_input_text(),
283                prompt: self.prompt.take(),
284                hidden: false,
285            };
286            Interaction {
287                input,
288                output: Parsed::default(),
289                exit_status: self.exit_status,
290            }
291        }))
292    }
293}
294
295/// States of the FSM for parsing SVGs.
296#[derive(Debug)]
297enum ParserState {
298    /// Initial state.
299    Initialized,
300    /// Encountered `<svg>` tag; searching for `<div class="container">`.
301    EncounteredSvgTag,
302    /// Encountered `<div class="container">`; searching for `<div class="input">`.
303    EncounteredContainer,
304    /// Reading user input (`<div class="input">` contents).
305    ReadingUserInput(UserInputState),
306    /// Finished reading user input; searching for `<div class="output">`.
307    EncounteredUserInput(Interaction<Parsed>),
308    /// Reading terminal output (`<div class="output">` contents).
309    ReadingTermOutput(Interaction<Parsed>, TextReadingState),
310}
311
312impl ParserState {
313    const DUMMY_INTERACTION: Interaction<Parsed> = Interaction {
314        input: UserInput {
315            text: String::new(),
316            prompt: None,
317            hidden: false,
318        },
319        output: Parsed::DEFAULT,
320        exit_status: None,
321    };
322
323    #[cfg_attr(feature = "tracing", tracing::instrument(level = "debug"))]
324    fn set_state(&mut self, new_state: Self) {
325        *self = new_state;
326    }
327
328    #[cfg_attr(feature = "tracing", tracing::instrument(level = "trace", err))]
329    fn process(&mut self, event: Event<'_>) -> Result<Option<Interaction<Parsed>>, ParseError> {
330        match self {
331            Self::Initialized => {
332                if let Event::Start(tag) = event {
333                    if tag.name().as_ref() == b"svg" {
334                        *self = Self::EncounteredSvgTag;
335                    } else {
336                        let tag_name = String::from_utf8_lossy(tag.name().as_ref()).into_owned();
337                        return Err(ParseError::UnexpectedRoot(tag_name));
338                    }
339                }
340            }
341
342            Self::EncounteredSvgTag => {
343                if let Event::Start(tag) = event {
344                    if tag.name().as_ref() == b"div" {
345                        Self::verify_container_attrs(tag.attributes())?;
346                        self.set_state(Self::EncounteredContainer);
347                    }
348                }
349            }
350
351            Self::EncounteredContainer => {
352                if let Event::Start(tag) = event {
353                    let classes = parse_classes(tag.attributes())?;
354                    if Self::is_input_class(extract_base_class(&classes)) {
355                        let exit_status = parse_exit_status(tag.attributes())?;
356                        self.set_state(Self::ReadingUserInput(UserInputState::new(exit_status)));
357                    }
358                }
359            }
360
361            Self::ReadingUserInput(state) => {
362                if let Some(interaction) = state.process(event)? {
363                    self.set_state(Self::EncounteredUserInput(interaction));
364                }
365            }
366
367            Self::EncounteredUserInput(interaction) => {
368                if let Event::Start(tag) = event {
369                    let classes = parse_classes(tag.attributes())?;
370                    let base_class = extract_base_class(&classes);
371
372                    if Self::is_output_class(base_class) {
373                        let interaction = mem::replace(interaction, Self::DUMMY_INTERACTION);
374                        self.set_state(Self::ReadingTermOutput(
375                            interaction,
376                            TextReadingState::default(),
377                        ));
378                    } else if Self::is_input_class(base_class) {
379                        let interaction = mem::replace(interaction, Self::DUMMY_INTERACTION);
380                        let exit_status = parse_exit_status(tag.attributes())?;
381                        self.set_state(Self::ReadingUserInput(UserInputState::new(exit_status)));
382                        return Ok(Some(interaction));
383                    }
384                }
385            }
386
387            Self::ReadingTermOutput(interaction, text_state) => {
388                if let Some(term_output) = text_state.process(event)? {
389                    let mut interaction = mem::replace(interaction, Self::DUMMY_INTERACTION);
390                    interaction.output = term_output;
391                    self.set_state(Self::EncounteredContainer);
392                    return Ok(Some(interaction));
393                }
394            }
395        }
396        Ok(None)
397    }
398
399    fn is_input_class(class_name: &[u8]) -> bool {
400        class_name == b"input" || class_name == b"user-input"
401    }
402
403    fn is_output_class(class_name: &[u8]) -> bool {
404        class_name == b"output" || class_name == b"term-output"
405    }
406
407    #[cfg_attr(
408        feature = "tracing",
409        tracing::instrument(level = "debug", skip_all, err)
410    )]
411    fn verify_container_attrs(attributes: Attributes<'_>) -> Result<(), ParseError> {
412        const HTML_NS: &[u8] = b"http://www.w3.org/1999/xhtml";
413
414        let mut has_ns_attribute = false;
415        let mut has_class_attribute = false;
416
417        for attr in attributes {
418            let attr = attr.map_err(quick_xml::Error::InvalidAttr)?;
419            match attr.key.as_ref() {
420                b"xmlns" => {
421                    if attr.value.as_ref() != HTML_NS {
422                        return Err(ParseError::InvalidContainer);
423                    }
424                    has_ns_attribute = true;
425                }
426                b"class" => {
427                    if attr.value.as_ref() != b"container" {
428                        return Err(ParseError::InvalidContainer);
429                    }
430                    has_class_attribute = true;
431                }
432                _ => { /* Do nothing. */ }
433            }
434        }
435
436        if has_ns_attribute && has_class_attribute {
437            Ok(())
438        } else {
439            Err(ParseError::InvalidContainer)
440        }
441    }
442}