1use crate::raw::{
10 control_bin_raw, control_symbol_raw, control_word_hexbyte_raw, control_word_raw, end_group_raw,
11 newline_raw, rtf_text_raw, start_group_raw,
12};
13use std;
14
15use nom::branch::alt;
16use nom::combinator::map;
17use nom::multi::many0;
18use nom::Finish;
19use nom::IResult;
20
21#[derive(Debug)]
22pub struct ParseError<I> {
23 inner: nom::error::Error<I>,
24}
25
26impl<I> std::convert::From<nom::error::Error<I>> for ParseError<I> {
27 fn from(error: nom::error::Error<I>) -> Self {
28 Self { inner: error }
29 }
30}
31
32impl<I> std::fmt::Display for ParseError<I>
33where
34 I: std::fmt::Debug,
35{
36 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
37 write!(f, "Parser Error: {:?}", self.inner)
38 }
39}
40
41impl<I> std::error::Error for ParseError<I>
42where
43 I: std::fmt::Debug + std::fmt::Display + 'static,
44{
45 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
46 Some(&self.inner)
47 }
48}
49
50#[derive(PartialEq, Eq)]
51pub enum Token {
52 ControlSymbol(char),
53 ControlWord {
54 name: String,
55 arg: Option<i32>,
56 },
57 ControlBin(Vec<u8>),
58 Text(Vec<u8>),
62 StartGroup,
63 EndGroup,
64 Newline(Vec<u8>),
66}
67
68impl std::fmt::Debug for Token {
69 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
70 match self {
71 Token::ControlSymbol(c) => write!(f, "Token::ControlSymbol({c})"),
72 Token::ControlWord { name, arg } => write!(
73 f,
74 "Token::ControlWord({}{})",
75 name,
76 arg.map(|i| format!(":{i}")).unwrap_or_default()
77 ),
78 Token::ControlBin(data) => {
79 write!(f, "Token::ControlBin(")?;
80 for byte in data {
81 write!(f, " {byte:02x?}")?;
82 }
83 write!(f, ")")
84 }
85 Token::Text(data) => {
86 write!(f, "Token::Text(")?;
87 for byte in data {
88 write!(f, " {byte:02x?}")?;
89 }
90 write!(f, ")")
91 }
92 Token::StartGroup => write!(f, "Token::StartGroup"),
93 Token::EndGroup => write!(f, "Token::EndGroup"),
94 Token::Newline(data) => {
95 write!(f, "Token::Newline(")?;
96 for byte in data {
97 write!(f, " {byte:02x?}")?;
98 }
99 write!(f, ")")
100 }
101 }
102 }
103}
104
105impl Token {
106 pub fn to_rtf(&self) -> Vec<u8> {
107 match self {
108 Token::ControlSymbol(c) => format!("\\{c}").as_bytes().to_vec(),
109 Token::ControlWord { name, arg } => match arg {
110 Some(num) => format!("\\{name}{num}").as_bytes().to_vec(),
111 None => format!("\\{name}").as_bytes().to_vec(),
112 },
113 Token::ControlBin(data) => {
114 let mut rtf: Vec<u8> = Vec::with_capacity(12 + data.len());
115 rtf.extend_from_slice(format!("\\bin{} ", data.len()).as_bytes());
116 rtf.extend_from_slice(data);
117 rtf
118 }
119 Token::Text(data) => data.to_vec(),
120 Token::StartGroup => b"{".to_vec(),
121 Token::EndGroup => b"}".to_vec(),
122 Token::Newline(data) => data.to_vec(),
123 }
124 }
125
126 pub fn token_delimiter_after(&self, next_token: &Token) -> &'static str {
133 if let Token::ControlWord { .. } = self {
134 if let Token::Text(_) = next_token {
137 return " ";
138 }
139 }
140 ""
141 }
142
143 pub fn token_delimiter_before(&self, prev_token: &Token) -> &'static str {
150 prev_token.token_delimiter_after(self)
151 }
152
153 pub fn get_name(&self) -> Option<String> {
154 if let Token::ControlWord { ref name, .. } = self {
155 Some(name.clone())
156 } else {
157 None
158 }
159 }
160
161 pub fn get_arg(&self) -> Option<i32> {
162 if let Token::ControlWord { ref arg, .. } = self {
163 *arg
164 } else {
165 None
166 }
167 }
168
169 pub fn get_symbol(&self) -> Option<char> {
170 if let Token::ControlSymbol(c) = self {
171 Some(*c)
172 } else {
173 None
174 }
175 }
176
177 pub fn get_bin(&self) -> Option<&[u8]> {
178 if let Token::ControlBin(data) = self {
179 Some(data.as_slice())
180 } else {
181 None
182 }
183 }
184
185 pub fn get_text(&self) -> Option<&[u8]> {
186 if let Token::Text(data) = self {
187 Some(data.as_slice())
188 } else {
189 None
190 }
191 }
192}
193
194pub fn read_token(input: &[u8]) -> IResult<&[u8], Token> {
203 alt((
204 read_control_hexbyte,
205 read_control_symbol,
206 read_control_bin,
207 read_control_word,
208 read_start_group,
209 read_end_group,
210 read_newline,
211 read_rtf_text,
212 ))(input)
213}
214
215pub fn read_control_hexbyte(input: &[u8]) -> IResult<&[u8], Token> {
216 map(control_word_hexbyte_raw, |(name, arg)| Token::ControlWord {
217 name: String::from(name),
218 arg,
219 })(input)
220}
221
222pub fn read_control_symbol(input: &[u8]) -> IResult<&[u8], Token> {
223 map(control_symbol_raw, Token::ControlSymbol)(input)
224}
225
226pub fn read_control_word(input: &[u8]) -> IResult<&[u8], Token> {
227 map(control_word_raw, |(name, arg)| Token::ControlWord {
228 name: String::from(name),
229 arg,
230 })(input)
231}
232
233pub fn read_control_bin(input: &[u8]) -> IResult<&[u8], Token> {
234 map(control_bin_raw, |bytes| Token::ControlBin(bytes.to_vec()))(input)
235}
236
237pub fn read_newline(input: &[u8]) -> IResult<&[u8], Token> {
238 map(newline_raw, |bytes| Token::Newline(bytes.to_vec()))(input)
239}
240
241pub fn read_start_group(input: &[u8]) -> IResult<&[u8], Token> {
242 map(start_group_raw, |_| Token::StartGroup)(input)
243}
244
245pub fn read_end_group(input: &[u8]) -> IResult<&[u8], Token> {
246 map(end_group_raw, |_| Token::EndGroup)(input)
247}
248
249pub fn read_rtf_text(input: &[u8]) -> IResult<&[u8], Token> {
250 map(rtf_text_raw, |text_bytes| Token::Text(text_bytes.to_vec()))(input)
251}
252
253pub fn read_token_stream(input: &[u8]) -> IResult<&[u8], Vec<Token>> {
254 many0(read_token)(input)
255}
256
257pub fn parse(bytes: &[u8]) -> IResult<&[u8], Vec<Token>> {
258 read_token_stream(bytes)
259}
260
261pub fn parse_finished(bytes: &[u8]) -> Result<Vec<Token>, ParseError<&[u8]>> {
262 parse(bytes)
263 .finish()
264 .map(|(_, o)| o)
265 .map_err(ParseError::from)
266}
267
268#[cfg(test)]
269mod tests {
270 use super::*;
271
272 #[test]
273 fn test_control_symbol_tokens() {
274 let syms_str = br#"\*\.\+\~"#;
275 let valid_syms = vec![
276 Token::ControlSymbol('*'),
277 Token::ControlSymbol('.'),
278 Token::ControlSymbol('+'),
279 Token::ControlSymbol('~'),
280 ];
281 let syms_after_parse: &[u8] = b"";
282 let syms = read_token_stream(syms_str);
283 assert_eq!(syms, Ok((syms_after_parse, valid_syms)));
284 }
285
286 #[test]
287 fn test_control_word_tokens() {
288 let words_str = br#"\par\b0\b\uncle\foo-5\applepi314159"#;
289 let valid_words = vec![
290 Token::ControlWord {
291 name: "par".to_string(),
292 arg: None,
293 },
294 Token::ControlWord {
295 name: "b".to_string(),
296 arg: Some(0),
297 },
298 Token::ControlWord {
299 name: "b".to_string(),
300 arg: None,
301 },
302 Token::ControlWord {
303 name: "uncle".to_string(),
304 arg: None,
305 },
306 Token::ControlWord {
307 name: "foo".to_string(),
308 arg: Some(-5),
309 },
310 Token::ControlWord {
311 name: "applepi".to_string(),
312 arg: Some(314159),
313 },
314 ];
315 let words_after_parse: &[u8] = b"";
316 let words = read_token_stream(words_str);
317 assert_eq!(words, Ok((words_after_parse, valid_words)));
318 }
319
320 #[test]
321 fn test_control_bin_tokens() {
322 let bins_str = b"\\bin5 ABC{}\\bin1 {\\bin0 \\bin0\\bin1 \\bin1\x01\\bin1 \x02";
323 let valid_bins = vec![
324 Token::ControlBin(b"ABC{}".to_vec()),
325 Token::ControlBin(b"{".to_vec()),
326 Token::ControlBin(b"".to_vec()),
327 Token::ControlBin(b"".to_vec()),
328 Token::ControlBin(b" ".to_vec()),
329 Token::ControlBin(b"\x01".to_vec()),
330 Token::ControlBin(b"\x02".to_vec()),
331 ];
332 let bins_after_parse: &[u8] = b"";
333 let bins = read_token_stream(bins_str);
334 assert_eq!(bins, Ok((bins_after_parse, valid_bins)));
335 }
336
337 #[test]
338 fn test_control() {
339 let controls_str = b"\\*\\bin5 ABC{}\\b\\bin1 {\\bin0 \\b0\\bin0\\bin1 \\supercalifragilistic31415\\bin1\x01\\bin1 \x02";
340 let valid_controls = vec![
341 Token::ControlSymbol('*'),
342 Token::ControlBin(b"ABC{}".to_vec()),
343 Token::ControlWord {
344 name: "b".to_string(),
345 arg: None,
346 },
347 Token::ControlBin(b"{".to_vec()),
348 Token::ControlBin(b"".to_vec()),
349 Token::ControlWord {
350 name: "b".to_string(),
351 arg: Some(0),
352 },
353 Token::ControlBin(b"".to_vec()),
354 Token::ControlBin(b" ".to_vec()),
355 Token::ControlWord {
356 name: "supercalifragilistic".to_string(),
357 arg: Some(31415),
358 },
359 Token::ControlBin(b"\x01".to_vec()),
360 Token::ControlBin(b"\x02".to_vec()),
361 ];
362 let controls_after_parse: &[u8] = b"";
363 let controls = read_token_stream(controls_str);
364 assert_eq!(controls, Ok((controls_after_parse, valid_controls)));
365 }
366
367 #[test]
368 fn test_group_tokens() {
369 let group_content_str = b"\\b Hello World \\b0 \\par\r\nThis is a test {\\*\\nothing}\\\r";
371 let valid_group_content = vec![
372 Token::ControlWord {
373 name: "b".to_string(),
374 arg: None,
375 },
376 Token::Text(b"Hello World ".to_vec()),
377 Token::ControlWord {
378 name: "b".to_string(),
379 arg: Some(0),
380 },
381 Token::ControlWord {
382 name: "par".to_string(),
383 arg: None,
384 },
385 Token::Newline(vec![0x0d, 0x0a]),
386 Token::Text(b"This is a test ".to_vec()),
387 Token::StartGroup,
388 Token::ControlSymbol('*'),
389 Token::ControlWord {
390 name: "nothing".to_string(),
391 arg: None,
392 },
393 Token::EndGroup,
394 Token::ControlSymbol(0x0d.into()),
395 ];
396 let group_content_after_parse: &[u8] = b"";
397 let group_content = read_token_stream(group_content_str);
398 assert_eq!(
399 group_content,
400 Ok((group_content_after_parse, valid_group_content))
401 );
402 }
403
404 #[test]
405 fn test_sample_doc() {
406 let test_bytes = include_bytes!("../tests/sample.rtf");
407 parse(test_bytes).unwrap();
408 let (unparsed, _) = read_token_stream(test_bytes).unwrap();
409 assert_eq!(
410 unparsed.len(),
411 0,
412 "Unparsed data: {} bytes (first <=5 bytes: {:02X?})",
413 unparsed.len(),
414 &unparsed[0..std::cmp::min(5, unparsed.len())]
415 );
416 }
417
418 #[test]
420 fn test_spec_doc() {
421 let test_bytes = include_bytes!("../tests/RTF-Spec-1.7.rtf");
422 parse(test_bytes).unwrap();
423 let (unparsed, _) = read_token_stream(test_bytes).unwrap();
424 assert_eq!(
425 unparsed.len(),
426 0,
427 "Unparsed data: {} bytes (first <=5 bytes: {:02X?})",
428 unparsed.len(),
429 &unparsed[0..std::cmp::min(5, unparsed.len())]
430 );
431 }
432}