term_transcript/test/parser/
mod.rs1use quick_xml::{
4 events::{attributes::Attributes, Event},
5 Reader as XmlReader,
6};
7use termcolor::WriteColor;
8
9use std::{
10 borrow::Cow,
11 error::Error as StdError,
12 fmt,
13 io::{self, BufRead},
14 mem,
15 num::ParseIntError,
16 str,
17};
18
19#[cfg(test)]
20mod tests;
21mod text;
22
23use self::text::TextReadingState;
24use crate::{
25 test::color_diff::ColorSpan, ExitStatus, Interaction, TermOutput, Transcript, UserInput,
26};
27
28#[derive(Debug, Clone, Default)]
30pub struct Parsed {
31 pub(crate) plaintext: String,
32 pub(crate) color_spans: Vec<ColorSpan>,
33 pub(crate) html: String,
34}
35
36impl Parsed {
37 const DEFAULT: Self = Self {
38 plaintext: String::new(),
39 color_spans: Vec::new(),
40 html: String::new(),
41 };
42
43 pub fn plaintext(&self) -> &str {
45 &self.plaintext
46 }
47
48 #[doc(hidden)] pub fn write_colorized(&self, out: &mut impl WriteColor) -> io::Result<()> {
55 ColorSpan::write_colorized(&self.color_spans, out, &self.plaintext)
56 }
57
58 pub fn html(&self) -> &str {
60 &self.html
61 }
62
63 fn into_input_text(self) -> String {
67 if self.plaintext.starts_with(' ') {
68 self.plaintext[1..].to_owned()
69 } else {
70 self.plaintext
71 }
72 }
73}
74
75impl TermOutput for Parsed {}
76
77impl Transcript<Parsed> {
78 #[cfg_attr(feature = "tracing", tracing::instrument(skip_all, err))]
88 pub fn from_svg<R: BufRead>(reader: R) -> Result<Self, ParseError> {
89 let mut reader = XmlReader::from_reader(reader);
90 let mut buffer = vec![];
91 let mut state = ParserState::Initialized;
92 let mut transcript = Self::new();
93 let mut open_tags = 0;
94
95 loop {
96 let event = reader.read_event_into(&mut buffer)?;
97 match &event {
98 Event::Start(_) => {
99 open_tags += 1;
100 }
101 Event::End(_) => {
102 open_tags -= 1;
103 if open_tags == 0 {
104 break;
105 }
106 }
107 Event::Eof => break,
108 _ => { }
109 }
110
111 if let Some(interaction) = state.process(event)? {
112 #[cfg(feature = "tracing")]
113 tracing::debug!(
114 ?interaction.input,
115 interaction.output = ?interaction.output.plaintext,
116 ?interaction.exit_status,
117 "parsed interaction"
118 );
119 transcript.interactions.push(interaction);
120 }
121 }
122
123 match state {
124 ParserState::EncounteredContainer => Ok(transcript),
125 ParserState::EncounteredUserInput(interaction) => {
126 transcript.interactions.push(interaction);
127 Ok(transcript)
128 }
129 _ => Err(ParseError::UnexpectedEof),
130 }
131 }
132}
133
134fn parse_classes(attributes: Attributes<'_>) -> Result<Cow<'_, [u8]>, ParseError> {
135 let mut class = None;
136 for attr in attributes {
137 let attr = attr.map_err(quick_xml::Error::InvalidAttr)?;
138 if attr.key.as_ref() == b"class" {
139 class = Some(attr.value);
140 }
141 }
142 Ok(class.unwrap_or(Cow::Borrowed(b"")))
143}
144
145fn extract_base_class(classes: &[u8]) -> &[u8] {
146 let space_idx = classes.iter().position(|&ch| ch == b' ');
147 space_idx.map_or(classes.as_ref(), |idx| &classes[..idx])
148}
149
150fn parse_exit_status(attributes: Attributes<'_>) -> Result<Option<ExitStatus>, ParseError> {
151 let mut exit_status = None;
152 for attr in attributes {
153 let attr = attr.map_err(quick_xml::Error::InvalidAttr)?;
154 if attr.key.as_ref() == b"data-exit-status" {
155 let status = str::from_utf8(&attr.value).map_err(|err| ParseError::Xml(err.into()))?;
156 let status = status.parse().map_err(ParseError::InvalidExitStatus)?;
157 exit_status = Some(ExitStatus(status));
158 }
159 }
160 Ok(exit_status)
161}
162
163#[derive(Debug)]
165#[non_exhaustive]
166pub enum ParseError {
167 UnexpectedRoot(String),
169 InvalidContainer,
171 InvalidExitStatus(ParseIntError),
173 UnexpectedEof,
175 Xml(quick_xml::Error),
177}
178
179impl From<quick_xml::Error> for ParseError {
180 fn from(err: quick_xml::Error) -> Self {
181 Self::Xml(err)
182 }
183}
184
185impl From<io::Error> for ParseError {
186 fn from(err: io::Error) -> Self {
187 Self::Xml(err.into())
188 }
189}
190
191impl fmt::Display for ParseError {
192 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
193 match self {
194 Self::UnexpectedRoot(tag_name) => write!(
195 formatter,
196 "unexpected root XML tag: <{tag_name}>; expected <svg>"
197 ),
198 Self::InvalidContainer => formatter.write_str("invalid transcript container"),
199 Self::InvalidExitStatus(err) => write!(formatter, "invalid exit status: {err}"),
200 Self::UnexpectedEof => formatter.write_str("unexpected EOF"),
201 Self::Xml(err) => write!(formatter, "error parsing XML: {err}"),
202 }
203 }
204}
205
206impl StdError for ParseError {
207 fn source(&self) -> Option<&(dyn StdError + 'static)> {
208 match self {
209 Self::Xml(err) => Some(err),
210 Self::InvalidExitStatus(err) => Some(err),
211 _ => None,
212 }
213 }
214}
215
216#[derive(Debug)]
217struct UserInputState {
218 exit_status: Option<ExitStatus>,
219 text: TextReadingState,
220 prompt: Option<Cow<'static, str>>,
221 prompt_open_tags: Option<usize>,
222}
223
224impl UserInputState {
225 fn new(exit_status: Option<ExitStatus>) -> Self {
226 Self {
227 exit_status,
228 text: TextReadingState::default(),
229 prompt: None,
230 prompt_open_tags: None,
231 }
232 }
233}
234
235impl UserInputState {
236 fn can_start_prompt(&self) -> bool {
238 self.text.is_empty() && self.prompt.is_none() && self.prompt_open_tags.is_none()
239 }
240
241 fn can_end_prompt(&self) -> bool {
242 self.prompt.is_none()
243 && self
244 .prompt_open_tags
245 .map_or(false, |tags| tags + 1 == self.text.open_tags())
246 }
247
248 fn process(&mut self, event: Event<'_>) -> Result<Option<Interaction<Parsed>>, ParseError> {
249 let mut is_prompt_end = false;
250 if let Event::Start(tag) = &event {
251 if self.can_start_prompt() && parse_classes(tag.attributes())?.as_ref() == b"prompt" {
252 self.prompt_open_tags = Some(self.text.open_tags());
254 }
255 } else if let Event::End(_) = &event {
256 if self.can_end_prompt() {
257 is_prompt_end = true;
258 }
259 }
260
261 let maybe_parsed = self.text.process(event)?;
262 if is_prompt_end {
263 if let Some(parsed) = maybe_parsed {
264 let input = UserInput {
266 text: String::new(),
267 prompt: Some(UserInput::intern_prompt(parsed.plaintext)),
268 hidden: false,
269 };
270 return Ok(Some(Interaction {
271 input,
272 output: Parsed::default(),
273 exit_status: self.exit_status,
274 }));
275 }
276 let text = mem::take(&mut self.text.plaintext_buffer);
277 self.prompt = Some(UserInput::intern_prompt(text));
278 }
279
280 Ok(maybe_parsed.map(|parsed| {
281 let input = UserInput {
282 text: parsed.into_input_text(),
283 prompt: self.prompt.take(),
284 hidden: false,
285 };
286 Interaction {
287 input,
288 output: Parsed::default(),
289 exit_status: self.exit_status,
290 }
291 }))
292 }
293}
294
295#[derive(Debug)]
297enum ParserState {
298 Initialized,
300 EncounteredSvgTag,
302 EncounteredContainer,
304 ReadingUserInput(UserInputState),
306 EncounteredUserInput(Interaction<Parsed>),
308 ReadingTermOutput(Interaction<Parsed>, TextReadingState),
310}
311
312impl ParserState {
313 const DUMMY_INTERACTION: Interaction<Parsed> = Interaction {
314 input: UserInput {
315 text: String::new(),
316 prompt: None,
317 hidden: false,
318 },
319 output: Parsed::DEFAULT,
320 exit_status: None,
321 };
322
323 #[cfg_attr(feature = "tracing", tracing::instrument(level = "debug"))]
324 fn set_state(&mut self, new_state: Self) {
325 *self = new_state;
326 }
327
328 #[cfg_attr(feature = "tracing", tracing::instrument(level = "trace", err))]
329 fn process(&mut self, event: Event<'_>) -> Result<Option<Interaction<Parsed>>, ParseError> {
330 match self {
331 Self::Initialized => {
332 if let Event::Start(tag) = event {
333 if tag.name().as_ref() == b"svg" {
334 *self = Self::EncounteredSvgTag;
335 } else {
336 let tag_name = String::from_utf8_lossy(tag.name().as_ref()).into_owned();
337 return Err(ParseError::UnexpectedRoot(tag_name));
338 }
339 }
340 }
341
342 Self::EncounteredSvgTag => {
343 if let Event::Start(tag) = event {
344 if tag.name().as_ref() == b"div" {
345 Self::verify_container_attrs(tag.attributes())?;
346 self.set_state(Self::EncounteredContainer);
347 }
348 }
349 }
350
351 Self::EncounteredContainer => {
352 if let Event::Start(tag) = event {
353 let classes = parse_classes(tag.attributes())?;
354 if Self::is_input_class(extract_base_class(&classes)) {
355 let exit_status = parse_exit_status(tag.attributes())?;
356 self.set_state(Self::ReadingUserInput(UserInputState::new(exit_status)));
357 }
358 }
359 }
360
361 Self::ReadingUserInput(state) => {
362 if let Some(interaction) = state.process(event)? {
363 self.set_state(Self::EncounteredUserInput(interaction));
364 }
365 }
366
367 Self::EncounteredUserInput(interaction) => {
368 if let Event::Start(tag) = event {
369 let classes = parse_classes(tag.attributes())?;
370 let base_class = extract_base_class(&classes);
371
372 if Self::is_output_class(base_class) {
373 let interaction = mem::replace(interaction, Self::DUMMY_INTERACTION);
374 self.set_state(Self::ReadingTermOutput(
375 interaction,
376 TextReadingState::default(),
377 ));
378 } else if Self::is_input_class(base_class) {
379 let interaction = mem::replace(interaction, Self::DUMMY_INTERACTION);
380 let exit_status = parse_exit_status(tag.attributes())?;
381 self.set_state(Self::ReadingUserInput(UserInputState::new(exit_status)));
382 return Ok(Some(interaction));
383 }
384 }
385 }
386
387 Self::ReadingTermOutput(interaction, text_state) => {
388 if let Some(term_output) = text_state.process(event)? {
389 let mut interaction = mem::replace(interaction, Self::DUMMY_INTERACTION);
390 interaction.output = term_output;
391 self.set_state(Self::EncounteredContainer);
392 return Ok(Some(interaction));
393 }
394 }
395 }
396 Ok(None)
397 }
398
399 fn is_input_class(class_name: &[u8]) -> bool {
400 class_name == b"input" || class_name == b"user-input"
401 }
402
403 fn is_output_class(class_name: &[u8]) -> bool {
404 class_name == b"output" || class_name == b"term-output"
405 }
406
407 #[cfg_attr(
408 feature = "tracing",
409 tracing::instrument(level = "debug", skip_all, err)
410 )]
411 fn verify_container_attrs(attributes: Attributes<'_>) -> Result<(), ParseError> {
412 const HTML_NS: &[u8] = b"http://www.w3.org/1999/xhtml";
413
414 let mut has_ns_attribute = false;
415 let mut has_class_attribute = false;
416
417 for attr in attributes {
418 let attr = attr.map_err(quick_xml::Error::InvalidAttr)?;
419 match attr.key.as_ref() {
420 b"xmlns" => {
421 if attr.value.as_ref() != HTML_NS {
422 return Err(ParseError::InvalidContainer);
423 }
424 has_ns_attribute = true;
425 }
426 b"class" => {
427 if attr.value.as_ref() != b"container" {
428 return Err(ParseError::InvalidContainer);
429 }
430 has_class_attribute = true;
431 }
432 _ => { }
433 }
434 }
435
436 if has_ns_attribute && has_class_attribute {
437 Ok(())
438 } else {
439 Err(ParseError::InvalidContainer)
440 }
441 }
442}