term_parser/
lib.rs

1//! `term-parser` is a Rust crate for parsing [ANSI escape codes](https://en.wikipedia.org/wiki/ANSI_escape_code).
2//! The parser is based on [Paul Williams' ANSI-compatible video terminal parser](https://vt100.net/emu/dec_ansi_parser) modified to support UTF-8 input.
3//! The implementation uses a static state transition table to minimize branches.
4//!
5//! ## Usage
6//!
7//! To read escape codes, create an [`ActionIter`] from any [`std::io::Read`] and consume the [`Action`]s the iterator returns.
8//! This crate comes with a "logger" example that will print a description of the actions produced by stdin.
9//! To give it a try, run the following:
10//!
11//! ```bash
12//! echo -n "\x1b[30mhello\x1b[0m" | cargo run --example logger
13//! ```
14//!
15//! Below is the source for the logger example, which demonstrates how to read escape codes:
16//!
17//! ```rust
18//! fn main() {
19//! 	let stdin = std::io::stdin();
20//! 	let stdin = stdin.lock();
21//! 	let stdin = std::io::BufReader::new(stdin);
22//! 	let action_iter = term_parser::ActionIter::new(stdin);
23//! 	for action in action_iter {
24//! 		println!("{:?}", action);
25//! 	}
26//! }
27//! ```
28//!
29//! [`ActionIter`]: struct.ActionIter.html
30//! [`Action`]: enum.Action.html
31//! [`std::io::Read`]: https://doc.rust-lang.org/std/io/trait.Read.html
32//!
33
34use error::ParseError;
35use std::io::Read;
36
37mod error;
38mod table;
39
40pub use error::Error;
41
42const MAX_INTERMEDIATES: usize = 2;
43const MAX_PARAMS: usize = 16;
44
45/// An action, as described in [Paul Williams' ANSI-compatible video terminal parser](https://vt100.net/emu/dec_ansi_parser).
46/// To prevent heap allocation, intermediate and param bytes use arrays instead of Vecs.
47/// Be sure to only read `intermediates_count` bytes from `intermediates` and `params_count` bytes from `params`.
48#[derive(Debug, PartialEq)]
49pub enum Action {
50	Csi {
51		intermediates: [u8; MAX_INTERMEDIATES],
52		intermediates_count: usize,
53		params: [usize; MAX_PARAMS],
54		params_count: usize,
55		byte: u8,
56	},
57	Esc {
58		intermediates: [u8; MAX_INTERMEDIATES],
59		intermediates_count: usize,
60		params: [usize; MAX_PARAMS],
61		params_count: usize,
62	},
63	Execute(u8),
64	Hook {
65		intermediates: [u8; MAX_INTERMEDIATES],
66		intermediates_count: usize,
67		params: [usize; MAX_PARAMS],
68		params_count: usize,
69	},
70	OscEnd(),
71	OscPut(u8),
72	OscStart(),
73	Print(char),
74	Put(u8),
75	Unhook(u8),
76}
77
78/// An [`Iterator`] that returns [`Action`]s read from a [`std::io::Read`]er.
79///
80/// [`Iterator`]: https://doc.rust-lang.org/std/iter/index.html
81/// [`Action`]: enum.Action.html
82/// [`std::io::Read`]: https://doc.rust-lang.org/std/io/trait.Read.html
83///
84pub struct ActionIter<R>
85where
86	R: Read,
87{
88	bytes: std::io::Bytes<R>,
89	intermediates: [u8; MAX_INTERMEDIATES],
90	intermediates_count: usize,
91	params: [usize; MAX_PARAMS],
92	params_count: usize,
93	state: table::State,
94	table_actions_queue: [(table::Action, u8); 3],
95	table_actions_count: usize,
96}
97
98impl<R> ActionIter<R>
99where
100	R: Read,
101{
102	/// Create a new ActionIter from a [`Read`]er.
103	///
104	/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html
105	///
106	pub fn new(reader: R) -> Self {
107		ActionIter {
108			bytes: reader.bytes(),
109			intermediates: [0u8; MAX_INTERMEDIATES],
110			intermediates_count: 0,
111			params: [0usize; MAX_PARAMS],
112			params_count: 0,
113			state: table::State::Ground,
114			table_actions_queue: [(table::Action::None, 0u8); 3],
115			table_actions_count: 0,
116		}
117	}
118}
119
120impl<R: Read> Iterator for ActionIter<R> {
121	type Item = Result<Action, Error>;
122	fn next(&mut self) -> Option<Self::Item> {
123		loop {
124			// if the table action queue is empty,
125			// read the next byte and add any
126			// new table actions to the queue
127			if self.table_actions_count == 0 {
128				let byte = match self.bytes.next() {
129					None => return None,
130					Some(result) => match result {
131						Err(err) => return Some(Err(err.into())),
132						Ok(byte) => byte,
133					},
134				};
135				let old_state = self.state;
136				let (table_action, new_state) = table::query(old_state, byte);
137				self.state = new_state;
138				if new_state == table::State::Error {
139					return Some(Err(Error::Parse(ParseError {
140						state: old_state,
141						byte,
142					})));
143				} else if new_state != old_state {
144					let mut table_actions_count = 0;
145					let exit_action = table::EXIT_ACTIONS[old_state as usize];
146					if exit_action != table::Action::None {
147						self.table_actions_queue[table_actions_count] = (exit_action, byte);
148						table_actions_count += 1;
149					}
150					if table_action != table::Action::None {
151						self.table_actions_queue[table_actions_count] = (table_action, byte);
152						table_actions_count += 1;
153					}
154					let enter_action = table::ENTRY_ACTIONS[new_state as usize];
155					if enter_action != table::Action::None {
156						self.table_actions_queue[table_actions_count] = (enter_action, byte);
157						table_actions_count += 1;
158					}
159					self.table_actions_count = table_actions_count;
160				} else {
161					self.table_actions_queue[0] = (table_action, byte);
162					self.table_actions_count = 1;
163				}
164			}
165
166			// pop a table action off the queue
167			let (table_action, byte) = self.table_actions_queue[0];
168			self.table_actions_count -= 1;
169			self.table_actions_queue[0] = self.table_actions_queue[1];
170			self.table_actions_queue[1] = self.table_actions_queue[2];
171
172			// execute the table action and produce the parser action, if any
173			let action = match table_action {
174				table::Action::None => None,
175				table::Action::Clear => {
176					self.params_count = 0;
177					self.intermediates_count = 0;
178					None
179				}
180				table::Action::Collect => {
181					if self.intermediates_count < MAX_INTERMEDIATES {
182						self.intermediates[self.intermediates_count] = byte;
183						self.intermediates_count += 1;
184					}
185					None
186				}
187				table::Action::CsiDispatch => {
188					let intermediates = self.intermediates;
189					let intermediates_count = self.intermediates_count;
190					let params = self.params;
191					let params_count = self.params_count;
192					self.intermediates = [0; MAX_INTERMEDIATES];
193					self.intermediates_count = 0;
194					self.params = [0; MAX_PARAMS];
195					self.params_count = 0;
196					Some(Action::Csi {
197						intermediates,
198						intermediates_count,
199						params,
200						params_count,
201						byte,
202					})
203				}
204				table::Action::EscDispatch => {
205					let intermediates = self.intermediates;
206					let intermediates_count = self.intermediates_count;
207					let params = self.params;
208					let params_count = self.params_count;
209					self.intermediates = [0; MAX_INTERMEDIATES];
210					self.intermediates_count = 0;
211					self.params = [0; MAX_PARAMS];
212					self.params_count = 0;
213					Some(Action::Esc {
214						intermediates,
215						intermediates_count,
216						params,
217						params_count,
218					})
219				}
220				table::Action::Execute => Some(Action::Execute(byte)),
221				table::Action::Hook => {
222					let intermediates = self.intermediates;
223					let intermediates_count = self.intermediates_count;
224					let params = self.params;
225					let params_count = self.params_count;
226					self.intermediates = [0; MAX_INTERMEDIATES];
227					self.intermediates_count = 0;
228					self.params = [0; MAX_PARAMS];
229					self.params_count = 0;
230					Some(Action::Hook {
231						intermediates,
232						intermediates_count,
233						params,
234						params_count,
235					})
236				}
237				table::Action::Ignore => None,
238				table::Action::OscEnd => Some(Action::OscEnd()),
239				table::Action::OscPut => Some(Action::OscPut(byte)),
240				table::Action::OscStart => Some(Action::OscStart()),
241				table::Action::Param => {
242					if byte == b';' {
243						self.params[self.params_count] = 0;
244						self.params_count += 1;
245					} else {
246						if self.params_count == 0 {
247							self.params[self.params_count] = 0;
248							self.params_count = 1;
249						}
250						let param_index = self.params_count - 1;
251						self.params[param_index] =
252							self.params[param_index] * 10 + ((byte - b'0') as usize);
253					}
254					None
255				}
256				table::Action::Print => {
257					let n_bytes = table::UTF8_CHAR_WIDTH[byte as usize] as usize;
258					let mut bytes = [0u8; 4];
259					bytes[0] = byte;
260					for i in 1..n_bytes {
261						bytes[i] = match self.bytes.next() {
262							None => return None,
263							Some(result) => match result {
264								Err(err) => return Some(Err(err.into())),
265								Ok(byte) => byte,
266							},
267						};
268					}
269					let c = std::str::from_utf8(&bytes[0..n_bytes])
270						.ok()?
271						.chars()
272						.next()?;
273					Some(Action::Print(c))
274				}
275				table::Action::Put => Some(Action::Put(byte)),
276				table::Action::Unhook => Some(Action::Unhook(byte)),
277			};
278
279			// return the parser action if any, otherwise loop again
280			if let Some(result) = action {
281				return Some(Ok(result));
282			} else {
283				continue;
284			}
285		}
286	}
287}
288
289#[cfg(test)]
290mod test {
291
292	use super::{Action, ActionIter};
293
294	#[test]
295	fn test_ascii() {
296		let mut action_iter = ActionIter::new("hi".as_bytes());
297		let actions = vec![Action::Print('h'), Action::Print('i')];
298		for action in actions.into_iter() {
299			assert_eq!(action_iter.next(), Some(Ok(action)));
300		}
301		assert!(action_iter.next().is_none());
302	}
303
304	#[test]
305	fn test_utf8() {
306		let mut action_iter = ActionIter::new("xšŸ˜€yß".as_bytes());
307		let actions = vec![
308			Action::Print('x'),
309			Action::Print('šŸ˜€'),
310			Action::Print('y'),
311			Action::Print('ß'),
312		];
313		for action in actions.into_iter() {
314			assert_eq!(action_iter.next(), Some(Ok(action)));
315		}
316		assert!(action_iter.next().is_none());
317	}
318
319	#[test]
320	fn test_csi() {
321		let bytes = "\x1b[m\x1b[30mx\x1b[12;14HšŸ˜€".as_bytes();
322		let mut action_iter = ActionIter::new(bytes);
323		let actions = vec![
324			Action::Csi {
325				intermediates: [0, 0],
326				intermediates_count: 0,
327				params: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
328				params_count: 0,
329				byte: b'm',
330			},
331			Action::Csi {
332				intermediates: [0, 0],
333				intermediates_count: 0,
334				params: [30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
335				params_count: 1,
336				byte: b'm',
337			},
338			Action::Print('x'),
339			Action::Csi {
340				intermediates: [0, 0],
341				intermediates_count: 0,
342				params: [12, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
343				params_count: 2,
344				byte: b'H',
345			},
346			Action::Print('šŸ˜€'),
347		];
348		for action in actions.into_iter() {
349			assert_eq!(action_iter.next(), Some(Ok(action)));
350		}
351		assert!(action_iter.next().is_none());
352	}
353
354	#[test]
355	fn test_osc() {
356		let bytes: &[u8] = b"\x1b]hi\x9c";
357		let mut action_iter = ActionIter::new(bytes);
358		let actions = vec![
359			Action::OscStart(),
360			Action::OscPut(b'h'),
361			Action::OscPut(b'i'),
362			Action::OscEnd(),
363		];
364		for action in actions.into_iter() {
365			assert_eq!(action_iter.next(), Some(Ok(action)));
366		}
367		assert!(action_iter.next().is_none());
368	}
369
370	#[test]
371	fn test_multiple_table_actions_per_byte() {
372		let mut action_iter = ActionIter::new("\x1b\x50\x3f\x40\x1b\x5b\x39\x40🐶".as_bytes());
373		let actions = vec![
374			Action::Hook {
375				intermediates: [0x3f, 0],
376				intermediates_count: 1,
377				params: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
378				params_count: 0,
379			},
380			Action::Unhook(0x1b),
381			Action::Csi {
382				intermediates: [0, 0],
383				intermediates_count: 0,
384				params: [9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
385				params_count: 1,
386				byte: 0x40,
387			},
388			Action::Print('🐶'),
389		];
390		for action in actions.into_iter() {
391			assert_eq!(action_iter.next(), Some(Ok(action)));
392		}
393		assert!(action_iter.next().is_none());
394	}
395
396}