sixel_tokenizer/
lib.rs

1//! This is a tokenizer for serialized Sixel images, for more info on the protocol, see: <https://vt100.net/docs/vt3xx-gp/chapter14.html>
2//!
3//! [`Parser`] should be given sixel bytes one by one and a callback. The callback is called with a
4//! [`SixelEvent`] zero or more times per byte when relevant.
5//!
6//! # Example
7//! ```rust
8//! use sixel_tokenizer::Parser;
9//! 
10//! fn main() {
11//!     let sample = "
12//!         \u{1b}Pq
13//!         \"2;1;100;200
14//!         #0;2;0;0;0#1;2;100;100;0#2;2;0;100;0
15//!         #1~~@@vv@@~~@@~~$
16//!         #2??}}GG}}??}}??-
17//!         #1!14@
18//!         \u{1b}\\
19//!     ";
20//!     let sample_bytes = sample.as_bytes();
21//!     let mut events = vec![];
22//!     let mut parser = Parser::new();
23//!     for byte in sample_bytes {
24//!         parser.advance(&byte, |sixel_event| events.push(sixel_event));
25//!     }
26//!     let mut snapshot = String::new();
27//!     for event in events {
28//!         snapshot.push_str(&format!("{:?}", event));
29//!         snapshot.push('\n');
30//!     }
31//!     println!("{}", snapshot);
32//! }
33//! ```
34//!
35//! This program will print:
36//! ```text
37//! Dcs { macro_parameter: None, transparent_background: None, horizontal_pixel_distance: None }
38//! RasterAttribute { pan: 2, pad: 1, ph: Some(100), pv: Some(200) }
39//! ColorIntroducer { color_number: 0, color_coordinate_system: Some(RGB(0, 0, 0)) }
40//! ColorIntroducer { color_number: 1, color_coordinate_system: Some(RGB(100, 100, 0)) }
41//! ColorIntroducer { color_number: 2, color_coordinate_system: Some(RGB(0, 100, 0)) }
42//! ColorIntroducer { color_number: 1, color_coordinate_system: None }
43//! Data { byte: 126 }
44//! ...
45//! GotoNextLine
46//! ColorIntroducer { color_number: 1, color_coordinate_system: None }
47//! Repeat { repeat_count: 14, byte_to_repeat: 64 }
48//! End
49//! ```
50
51use std::num::ParseIntError;
52use std::str::Utf8Error;
53
54use arrayvec::{ArrayVec, CapacityError};
55use thiserror::Error;
56
57mod sixel_event;
58pub use sixel_event::SixelEvent;
59pub use sixel_event::ColorCoordinateSystem;
60
61#[derive(Error, Debug)]
62pub enum ParserError {
63    #[error("Failed to parse")]
64    ParsingError,
65    #[error("Failed to parse")]
66    CapacityError(#[from] CapacityError<ArrayVec<u8, 5>>),
67    #[error("Failed to parse")]
68    CapacityErrorU8(#[from] CapacityError<u8>),
69    #[error("Failed to parse")]
70    Utf8Error(#[from] Utf8Error),
71    #[error("Failed to parse")]
72    ParseIntError(#[from] ParseIntError),
73}
74
75#[derive(Clone, Copy, Debug)]
76pub enum ParserState {
77    Ground,
78    DeviceControlString,
79    EscapeCharacter,
80    ColorIntroducer,
81    RasterAttribute,
82    GraphicsRepeatIntroducer,
83    UnknownSequence,
84}
85
86#[derive(Clone, Debug)]
87pub struct Parser {
88    state: ParserState,
89    raw_instruction: ArrayVec<u8, 256>,
90    pending_event_fields: ArrayVec<ArrayVec<u8, 5>, 5>,
91    currently_parsing: ArrayVec<u8, 256>,
92}
93
94impl Parser {
95    pub fn new() -> Self {
96        Parser {
97            state: ParserState::Ground,
98            raw_instruction: ArrayVec::new(),
99            pending_event_fields: ArrayVec::new(),
100            currently_parsing: ArrayVec::new(),
101        }
102    }
103    pub fn advance(&mut self, byte: &u8, mut cb: impl FnMut(SixelEvent)) {
104        if byte == &b' ' || byte == &b'\n' || byte == &b'\t' {
105            // ignore whitespace
106            return;
107        }
108        if let Err(e) = self.process_byte(*byte, &mut cb) {
109            self.handle_error(e, Some(*byte), &mut cb);
110        }
111    }
112    fn process_byte(
113        &mut self,
114        byte: u8,
115        mut cb: impl FnMut(SixelEvent),
116    ) -> Result<(), ParserError> {
117        match (self.state, byte) {
118            (ParserState::EscapeCharacter, b'P') => self.raw_instruction.try_push(byte)?,
119            (ParserState::EscapeCharacter, b'\\') => self.emit_end_sequence(&mut cb)?,
120            (ParserState::DeviceControlString, b'q') => self.emit_dcs_event(&mut cb)?,
121            (ParserState::GraphicsRepeatIntroducer, b'?'..=b'~') => {
122                self.emit_repeat_introducer_event(byte, &mut cb)?
123            }
124            (_, b'?'..=b'~' | b'$' | b'-') => {
125                self.emit_possible_pending_event(&mut cb);
126                self.emit_single_byte_event(byte, &mut cb)?;
127            }
128            (_, b';') => {
129                self.raw_instruction.try_push(byte)?;
130                self.finalize_field()?;
131            }
132            (_, b'0'..=b'9') => {
133                self.raw_instruction.try_push(byte)?;
134                self.currently_parsing.try_push(byte)?;
135            }
136            _ => {
137                self.emit_possible_pending_event(&mut cb);
138                self.raw_instruction.try_push(byte)?;
139            }
140        };
141        self.move_to_next_state(byte);
142        Ok(())
143    }
144    fn move_to_next_state(&mut self, byte: u8) {
145        self.state = match (self.state, byte) {
146            (ParserState::EscapeCharacter, b'P') => ParserState::DeviceControlString,
147            (ParserState::EscapeCharacter, b'\\')
148            | (ParserState::DeviceControlString, b'q')
149            | (ParserState::GraphicsRepeatIntroducer, b'?'..=b'~') => ParserState::Ground,
150            (_, b'?'..=b'~' | b'$' | b'-') => ParserState::Ground,
151            (_, b'#') => ParserState::ColorIntroducer,
152            (_, b'"') => ParserState::RasterAttribute,
153            (_, b'!') => ParserState::GraphicsRepeatIntroducer,
154            (_, b';' | b'0'..=b'9') => self.state,
155            (_, 27) => ParserState::EscapeCharacter,
156            _ => ParserState::UnknownSequence,
157        };
158    }
159    fn handle_error(&mut self, err: ParserError, byte: Option<u8>, cb: impl FnMut(SixelEvent)) {
160        match err {
161            _ => {
162                self.state = ParserState::UnknownSequence;
163                self.pending_event_fields.clear();
164                self.currently_parsing.clear();
165                self.emit_unknown_sequences(cb, byte);
166            }
167        }
168    }
169    fn emit_dcs_event(&mut self, mut cb: impl FnMut(SixelEvent)) -> Result<(), ParserError> {
170        self.finalize_field()?;
171        let event = SixelEvent::dcs_from_fields(&mut self.pending_event_fields)?;
172        self.raw_instruction.clear();
173        cb(event);
174        Ok(())
175    }
176    fn emit_end_sequence(&mut self, mut cb: impl FnMut(SixelEvent)) -> Result<(), ParserError> {
177        self.finalize_field()?;
178        self.clear();
179        cb(SixelEvent::End);
180        Ok(())
181    }
182    fn emit_repeat_introducer_event(
183        &mut self,
184        byte: u8,
185        mut cb: impl FnMut(SixelEvent),
186    ) -> Result<(), ParserError> {
187        self.finalize_field()?;
188        let event = SixelEvent::repeat_from_fields(&mut self.pending_event_fields, byte)?;
189        self.raw_instruction.clear();
190        cb(event);
191        Ok(())
192    }
193    fn emit_possible_pending_event(&mut self, mut cb: impl FnMut(SixelEvent)) {
194        match self.possible_pending_event() {
195            Ok(Some(event)) => cb(event),
196            Ok(None) => {}
197            Err(e) => self.handle_error(e, None, &mut cb),
198        }
199    }
200    fn emit_single_byte_event(
201        &mut self,
202        byte: u8,
203        mut cb: impl FnMut(SixelEvent),
204    ) -> Result<(), ParserError> {
205        let event = match byte {
206            b'?'..=b'~' => self.sixel_data_event(byte),
207            b'$' => self.beginning_of_line_event(),
208            b'-' => self.next_line_event(),
209            _ => Err(ParserError::ParsingError),
210        };
211        cb(event?);
212        Ok(())
213    }
214    fn emit_unknown_sequences(&mut self, mut cb: impl FnMut(SixelEvent), last_byte: Option<u8>) {
215        loop {
216            let mut bytes: [Option<u8>; 5] = Default::default();
217            let unknown_sequence_elements = if self.raw_instruction.len() >= 5 {
218                self.raw_instruction.drain(..5).chain(None)
219            } else {
220                self.raw_instruction.drain(..).chain(last_byte)
221            };
222            for (i, byte) in unknown_sequence_elements.enumerate() {
223                bytes[i] = Some(byte);
224            }
225            cb(SixelEvent::UnknownSequence(bytes));
226            if self.raw_instruction.is_empty() {
227                break;
228            }
229        }
230    }
231    fn color_introducer_event(&mut self) -> Result<SixelEvent, ParserError> {
232        self.finalize_field()?;
233        let event = SixelEvent::color_introducer_from_fields(&mut self.pending_event_fields)?;
234        self.raw_instruction.clear();
235        Ok(event)
236    }
237    fn raster_attribute_event(&mut self) -> Result<SixelEvent, ParserError> {
238        self.finalize_field()?;
239        let event = SixelEvent::raster_attribute_from_fields(&mut self.pending_event_fields)?;
240        self.raw_instruction.clear();
241        Ok(event)
242    }
243    fn sixel_data_event(&mut self, byte: u8) -> Result<SixelEvent, ParserError> {
244        self.finalize_field()?;
245        self.raw_instruction.clear();
246        Ok(SixelEvent::Data { byte })
247    }
248    fn beginning_of_line_event(&mut self) -> Result<SixelEvent, ParserError> {
249        self.raw_instruction.clear();
250        Ok(SixelEvent::GotoBeginningOfLine)
251    }
252    fn next_line_event(&mut self) -> Result<SixelEvent, ParserError> {
253        self.raw_instruction.clear();
254        Ok(SixelEvent::GotoNextLine)
255    }
256    fn possible_pending_event(&mut self) -> Result<Option<SixelEvent>, ParserError> {
257        let has_pending_event = !self.currently_parsing.is_empty()
258            || !self.pending_event_fields.is_empty()
259            || !self.raw_instruction.is_empty();
260        if has_pending_event {
261            match self.state {
262                ParserState::ColorIntroducer => {
263                    let event = self.color_introducer_event()?;
264                    Ok(Some(event))
265                }
266                ParserState::RasterAttribute => {
267                    let event = self.raster_attribute_event()?;
268                    Ok(Some(event))
269                }
270                _ => Err(ParserError::ParsingError),
271            }
272        } else {
273            Ok(None)
274        }
275    }
276    fn finalize_field(&mut self) -> Result<(), ParserError> {
277        if !self.currently_parsing.is_empty() {
278            let mut field: ArrayVec<u8, 5> = Default::default();
279            for byte in self.currently_parsing.drain(..) {
280                // we don't use collect here because ArrayVec doesn't implement Try and so
281                // we wouldn't be able to propagate errors
282                field.try_push(byte)?;
283            }
284            self.pending_event_fields.try_push(field)?;
285        }
286        Ok(())
287    }
288    fn clear(&mut self) {
289        drop(std::mem::replace(self, Parser::new()));
290    }
291}
292
293#[cfg(test)]
294#[path = "./tests.rs"]
295mod tests;