term_transcript/test/parser/
mod.rs1use std::{
4 borrow::Cow,
5 error::Error as StdError,
6 fmt,
7 io::{self, BufRead},
8 mem,
9 num::ParseIntError,
10 str::{self, Utf8Error},
11};
12
13use quick_xml::{
14 encoding::EncodingError,
15 events::{attributes::Attributes, Event},
16 Reader as XmlReader,
17};
18use termcolor::WriteColor;
19
20#[cfg(test)]
21mod tests;
22mod text;
23
24use self::text::TextReadingState;
25use crate::{
26 test::color_diff::ColorSpan, ExitStatus, Interaction, TermOutput, Transcript, UserInput,
27};
28
29fn map_utf8_error(err: Utf8Error) -> quick_xml::Error {
30 quick_xml::Error::Encoding(EncodingError::Utf8(err))
31}
32
33#[derive(Debug, Clone, Default)]
35pub struct Parsed {
36 pub(crate) plaintext: String,
37 pub(crate) color_spans: Vec<ColorSpan>,
38 pub(crate) html: String,
39}
40
41impl Parsed {
42 const DEFAULT: Self = Self {
43 plaintext: String::new(),
44 color_spans: Vec::new(),
45 html: String::new(),
46 };
47
48 pub fn plaintext(&self) -> &str {
50 &self.plaintext
51 }
52
53 #[doc(hidden)] pub fn write_colorized(&self, out: &mut impl WriteColor) -> io::Result<()> {
60 ColorSpan::write_colorized(&self.color_spans, out, &self.plaintext)
61 }
62
63 pub fn html(&self) -> &str {
65 &self.html
66 }
67
68 fn into_input_text(self) -> String {
72 if self.plaintext.starts_with(' ') {
73 self.plaintext[1..].to_owned()
74 } else {
75 self.plaintext
76 }
77 }
78}
79
80impl TermOutput for Parsed {}
81
82impl Transcript<Parsed> {
83 #[cfg_attr(feature = "tracing", tracing::instrument(skip_all, err))]
93 pub fn from_svg<R: BufRead>(reader: R) -> Result<Self, ParseError> {
94 let mut reader = XmlReader::from_reader(reader);
95 let mut buffer = vec![];
96 let mut state = ParserState::Initialized;
97 let mut transcript = Self::new();
98 let mut open_tags = 0;
99
100 loop {
101 let event = reader.read_event_into(&mut buffer)?;
102 match &event {
103 Event::Start(_) => {
104 open_tags += 1;
105 }
106 Event::End(_) => {
107 open_tags -= 1;
108 if open_tags == 0 {
109 break;
110 }
111 }
112 Event::Eof => break,
113 _ => { }
114 }
115
116 if let Some(interaction) = state.process(event)? {
117 #[cfg(feature = "tracing")]
118 tracing::debug!(
119 ?interaction.input,
120 interaction.output = ?interaction.output.plaintext,
121 ?interaction.exit_status,
122 "parsed interaction"
123 );
124 transcript.interactions.push(interaction);
125 }
126 }
127
128 match state {
129 ParserState::EncounteredContainer => Ok(transcript),
130 ParserState::EncounteredUserInput(interaction) => {
131 transcript.interactions.push(interaction);
132 Ok(transcript)
133 }
134 _ => Err(ParseError::UnexpectedEof),
135 }
136 }
137}
138
139fn parse_classes(attributes: Attributes<'_>) -> Result<Cow<'_, [u8]>, ParseError> {
140 let mut class = None;
141 for attr in attributes {
142 let attr = attr.map_err(quick_xml::Error::InvalidAttr)?;
143 if attr.key.as_ref() == b"class" {
144 class = Some(attr.value);
145 }
146 }
147 Ok(class.unwrap_or(Cow::Borrowed(b"")))
148}
149
150fn extract_base_class(classes: &[u8]) -> &[u8] {
151 let space_idx = classes.iter().position(|&ch| ch == b' ');
152 space_idx.map_or(classes.as_ref(), |idx| &classes[..idx])
153}
154
155fn parse_exit_status(attributes: Attributes<'_>) -> Result<Option<ExitStatus>, ParseError> {
156 let mut exit_status = None;
157 for attr in attributes {
158 let attr = attr.map_err(quick_xml::Error::InvalidAttr)?;
159 if attr.key.as_ref() == b"data-exit-status" {
160 let status =
161 str::from_utf8(&attr.value).map_err(|err| ParseError::Xml(map_utf8_error(err)))?;
162 let status = status.parse().map_err(ParseError::InvalidExitStatus)?;
163 exit_status = Some(ExitStatus(status));
164 }
165 }
166 Ok(exit_status)
167}
168
169#[derive(Debug)]
171#[non_exhaustive]
172pub enum ParseError {
173 UnexpectedRoot(String),
175 InvalidContainer,
177 InvalidExitStatus(ParseIntError),
179 UnexpectedEof,
181 Xml(quick_xml::Error),
183}
184
185impl From<quick_xml::Error> for ParseError {
186 fn from(err: quick_xml::Error) -> Self {
187 Self::Xml(err)
188 }
189}
190
191impl From<io::Error> for ParseError {
192 fn from(err: io::Error) -> Self {
193 Self::Xml(err.into())
194 }
195}
196
197impl fmt::Display for ParseError {
198 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
199 match self {
200 Self::UnexpectedRoot(tag_name) => write!(
201 formatter,
202 "unexpected root XML tag: <{tag_name}>; expected <svg>"
203 ),
204 Self::InvalidContainer => formatter.write_str("invalid transcript container"),
205 Self::InvalidExitStatus(err) => write!(formatter, "invalid exit status: {err}"),
206 Self::UnexpectedEof => formatter.write_str("unexpected EOF"),
207 Self::Xml(err) => write!(formatter, "error parsing XML: {err}"),
208 }
209 }
210}
211
212impl StdError for ParseError {
213 fn source(&self) -> Option<&(dyn StdError + 'static)> {
214 match self {
215 Self::Xml(err) => Some(err),
216 Self::InvalidExitStatus(err) => Some(err),
217 _ => None,
218 }
219 }
220}
221
222#[derive(Debug)]
223struct UserInputState {
224 exit_status: Option<ExitStatus>,
225 is_hidden: bool,
226 text: TextReadingState,
227 prompt: Option<Cow<'static, str>>,
228 prompt_open_tags: Option<usize>,
229}
230
231impl UserInputState {
232 fn new(exit_status: Option<ExitStatus>, is_hidden: bool) -> Self {
233 Self {
234 exit_status,
235 is_hidden,
236 text: TextReadingState::default(),
237 prompt: None,
238 prompt_open_tags: None,
239 }
240 }
241}
242
243impl UserInputState {
244 fn can_start_prompt(&self) -> bool {
246 self.text.is_empty() && self.prompt.is_none() && self.prompt_open_tags.is_none()
247 }
248
249 fn can_end_prompt(&self) -> bool {
250 self.prompt.is_none()
251 && self
252 .prompt_open_tags
253 .is_some_and(|tags| tags + 1 == self.text.open_tags())
254 }
255
256 fn process(&mut self, event: Event<'_>) -> Result<Option<Interaction<Parsed>>, ParseError> {
257 let mut is_prompt_end = false;
258 if let Event::Start(tag) = &event {
259 if self.can_start_prompt() && parse_classes(tag.attributes())?.as_ref() == b"prompt" {
260 self.prompt_open_tags = Some(self.text.open_tags());
262 }
263 } else if let Event::End(_) = &event {
264 if self.can_end_prompt() {
265 is_prompt_end = true;
266 }
267 }
268
269 let maybe_parsed = self.text.process(event)?;
270 if is_prompt_end {
271 if let Some(parsed) = maybe_parsed {
272 let input = UserInput {
274 text: String::new(),
275 prompt: Some(UserInput::intern_prompt(parsed.plaintext)),
276 hidden: self.is_hidden,
277 };
278 return Ok(Some(Interaction {
279 input,
280 output: Parsed::default(),
281 exit_status: self.exit_status,
282 }));
283 }
284 let text = mem::take(&mut self.text.plaintext_buffer);
285 self.prompt = Some(UserInput::intern_prompt(text));
286 }
287
288 Ok(maybe_parsed.map(|parsed| {
289 let input = UserInput {
290 text: parsed.into_input_text(),
291 prompt: self.prompt.take(),
292 hidden: self.is_hidden,
293 };
294 Interaction {
295 input,
296 output: Parsed::default(),
297 exit_status: self.exit_status,
298 }
299 }))
300 }
301}
302
303#[derive(Debug)]
305enum ParserState {
306 Initialized,
308 EncounteredSvgTag,
310 EncounteredContainer,
312 ReadingUserInput(UserInputState),
314 EncounteredUserInput(Interaction<Parsed>),
316 ReadingTermOutput(Interaction<Parsed>, TextReadingState),
318}
319
320impl ParserState {
321 const DUMMY_INTERACTION: Interaction<Parsed> = Interaction {
322 input: UserInput {
323 text: String::new(),
324 prompt: None,
325 hidden: false,
326 },
327 output: Parsed::DEFAULT,
328 exit_status: None,
329 };
330
331 #[cfg_attr(feature = "tracing", tracing::instrument(level = "debug"))]
332 fn set_state(&mut self, new_state: Self) {
333 *self = new_state;
334 }
335
336 #[cfg_attr(feature = "tracing", tracing::instrument(level = "trace", err))]
337 fn process(&mut self, event: Event<'_>) -> Result<Option<Interaction<Parsed>>, ParseError> {
338 match self {
339 Self::Initialized => {
340 if let Event::Start(tag) = event {
341 if tag.name().as_ref() == b"svg" {
342 *self = Self::EncounteredSvgTag;
343 } else {
344 let tag_name = String::from_utf8_lossy(tag.name().as_ref()).into_owned();
345 return Err(ParseError::UnexpectedRoot(tag_name));
346 }
347 }
348 }
349
350 Self::EncounteredSvgTag => {
351 if let Event::Start(tag) = event {
352 if tag.name().as_ref() == b"div" {
353 Self::verify_container_attrs(tag.attributes())?;
354 self.set_state(Self::EncounteredContainer);
355 }
356 }
357 }
358
359 Self::EncounteredContainer => {
360 if let Event::Start(tag) = event {
361 let classes = parse_classes(tag.attributes())?;
362 if Self::is_input_class(extract_base_class(&classes)) {
363 let is_hidden = classes
364 .split(|byte| *byte == b' ')
365 .any(|chunk| chunk == b"input-hidden");
366 let exit_status = parse_exit_status(tag.attributes())?;
367 self.set_state(Self::ReadingUserInput(UserInputState::new(
368 exit_status,
369 is_hidden,
370 )));
371 }
372 }
373 }
374
375 Self::ReadingUserInput(state) => {
376 if let Some(interaction) = state.process(event)? {
377 self.set_state(Self::EncounteredUserInput(interaction));
378 }
379 }
380
381 Self::EncounteredUserInput(interaction) => {
382 if let Event::Start(tag) = event {
383 let classes = parse_classes(tag.attributes())?;
384 let base_class = extract_base_class(&classes);
385
386 if Self::is_output_class(base_class) {
387 let interaction = mem::replace(interaction, Self::DUMMY_INTERACTION);
388 self.set_state(Self::ReadingTermOutput(
389 interaction,
390 TextReadingState::default(),
391 ));
392 } else if Self::is_input_class(base_class) {
393 let interaction = mem::replace(interaction, Self::DUMMY_INTERACTION);
394 let exit_status = parse_exit_status(tag.attributes())?;
395 let is_hidden = classes
396 .split(|byte| *byte == b' ')
397 .any(|chunk| chunk == b"input-hidden");
398 self.set_state(Self::ReadingUserInput(UserInputState::new(
399 exit_status,
400 is_hidden,
401 )));
402 return Ok(Some(interaction));
403 }
404 }
405 }
406
407 Self::ReadingTermOutput(interaction, text_state) => {
408 if let Some(term_output) = text_state.process(event)? {
409 let mut interaction = mem::replace(interaction, Self::DUMMY_INTERACTION);
410 interaction.output = term_output;
411 self.set_state(Self::EncounteredContainer);
412 return Ok(Some(interaction));
413 }
414 }
415 }
416 Ok(None)
417 }
418
419 fn is_input_class(class_name: &[u8]) -> bool {
420 class_name == b"input" || class_name == b"user-input"
421 }
422
423 fn is_output_class(class_name: &[u8]) -> bool {
424 class_name == b"output" || class_name == b"term-output"
425 }
426
427 #[cfg_attr(
428 feature = "tracing",
429 tracing::instrument(level = "debug", skip_all, err)
430 )]
431 fn verify_container_attrs(attributes: Attributes<'_>) -> Result<(), ParseError> {
432 const HTML_NS: &[u8] = b"http://www.w3.org/1999/xhtml";
433
434 let mut has_ns_attribute = false;
435 let mut has_class_attribute = false;
436
437 for attr in attributes {
438 let attr = attr.map_err(quick_xml::Error::InvalidAttr)?;
439 match attr.key.as_ref() {
440 b"xmlns" => {
441 if attr.value.as_ref() != HTML_NS {
442 return Err(ParseError::InvalidContainer);
443 }
444 has_ns_attribute = true;
445 }
446 b"class" => {
447 if attr.value.as_ref() != b"container" {
448 return Err(ParseError::InvalidContainer);
449 }
450 has_class_attribute = true;
451 }
452 _ => { }
453 }
454 }
455
456 if has_ns_attribute && has_class_attribute {
457 Ok(())
458 } else {
459 Err(ParseError::InvalidContainer)
460 }
461 }
462}