1use std::collections::HashMap;
2use std::{fmt, mem};
3
4use serde::{Deserialize, Serialize};
5use wasm_bindgen::prelude::wasm_bindgen;
6
7use crate::document::RtfDocument;
8use crate::header::{CharacterSet, Color, ColorRef, ColorTable, Font, FontFamily, FontRef, FontTable, RtfHeader, StyleSheet};
9use crate::paragraph::{Alignment, Paragraph, SpaceBetweenLine};
10use crate::tokens::{ControlWord, Property, Token};
11
12macro_rules! header_control_word {
14 ($cw:ident) => {
15 &Token::ControlSymbol((ControlWord::$cw, _))
16 };
17 ($cw:ident, $prop:ident) => {
18 &Token::ControlSymbol((ControlWord::$cw, Property::$prop))
19 };
20}
21
22#[derive(Debug, Default, PartialEq, Clone, Deserialize, Serialize)]
23#[wasm_bindgen(getter_with_clone)]
24pub struct StyleBlock {
25 pub painter: Painter,
26 pub paragraph: Paragraph,
27 pub text: String,
28}
29
30#[derive(Debug, Clone, PartialEq, Hash, Deserialize, Serialize)]
31#[wasm_bindgen]
32pub struct Painter {
33 pub color_ref: ColorRef,
34 pub font_ref: FontRef,
35 pub font_size: u16,
36 pub bold: bool,
37 pub italic: bool,
38 pub underline: bool,
39 pub superscript: bool,
40 pub subscript: bool,
41 pub smallcaps: bool,
42 pub strike: bool,
43}
44
45impl Default for Painter {
46 fn default() -> Self {
47 Self {
48 color_ref: Default::default(),
49 font_ref: Default::default(),
50 font_size: 12,
51 bold: Default::default(),
52 italic: Default::default(),
53 underline: Default::default(),
54 superscript: Default::default(),
55 subscript: Default::default(),
56 smallcaps: Default::default(),
57 strike: Default::default(),
58 }
59 }
60}
61
62#[derive(Debug, Clone)]
63pub enum ParserError {
64 InvalidToken(String),
65 IgnorableDestinationParsingError,
66 MalformedPainterStack,
67 InvalidFontIdentifier(Property),
68 InvalidColorIdentifier(Property),
69 NoMoreToken,
70 ValueCastError(String),
71 UnicodeParsingError(i32),
72 ParseEmptyToken,
73}
74
75impl std::error::Error for ParserError {}
76
77impl fmt::Display for ParserError {
78 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
79 let _ = write!(f, "[RTF Parser] : ");
80 return match self {
81 ParserError::InvalidToken(msg) => write!(f, "{}", msg),
82 ParserError::IgnorableDestinationParsingError => write!(f, "No ignorable destination should be left"),
83 ParserError::MalformedPainterStack => write!(f, "Malformed painter stack : Unbalanced number of brackets"),
84 ParserError::InvalidFontIdentifier(property) => write!(f, "Invalid font identifier : {:?}", property),
85 ParserError::InvalidColorIdentifier(property) => write!(f, "Invalid color identifier : {:?}", property),
86 ParserError::NoMoreToken => write!(f, "No more token to parse"),
87 ParserError::ValueCastError(_type) => write!(f, "Unable to cast i32 to {_type}"),
88 ParserError::UnicodeParsingError(value) => write!(f, "Unable to parse {value} value to unicode"),
89 ParserError::ParseEmptyToken => write!(f, "Try to parse an empty token, this should never happen. If so, please open an issue in the github repository"),
90 };
91 }
92}
93
94#[derive(Debug, Clone, PartialEq, Hash)]
96struct ParserState {
97 pub painter: Painter,
98 pub paragraph: Paragraph,
99 pub unicode_ignore_count: i32,
100}
101
102impl Default for ParserState {
103 fn default() -> Self {
104 Self {
105 painter: Default::default(),
106 paragraph: Default::default(),
107 unicode_ignore_count: 1,
108 }
109 }
110}
111
112pub struct Parser<'a> {
113 tokens: Vec<Token<'a>>,
114 parsed_item: Vec<bool>,
115 cursor: usize,
116}
117
118impl<'a> Parser<'a> {
119 pub fn new(tokens: Vec<Token<'a>>) -> Self {
120 return Self {
121 parsed_item: vec![false; tokens.len()],
122 tokens,
123 cursor: 0,
124 };
125 }
126
127 pub fn get_tokens(&self) -> Vec<&Token> {
128 return self.tokens.iter().filter(|t| *t != &Token::Empty).collect();
130 }
131
132 fn check_document_validity(&self) -> Result<(), ParserError> {
133 if let Some(token) = self.tokens.first() {
135 if token != &Token::OpeningBracket {
136 return Err(ParserError::InvalidToken(format!("Invalid first token : {:?} not a '{{'", token)));
137 }
138 } else {
139 return Err(ParserError::NoMoreToken);
140 }
141 if let Some(token) = self.tokens.last() {
142 if token != &Token::ClosingBracket {
143 return Err(ParserError::InvalidToken(format!("Invalid last token : {:?} not a '}}'", token)));
144 }
145 } else {
146 return Err(ParserError::NoMoreToken);
147 }
148 return Ok(());
149 }
150
151 pub fn parse(&mut self) -> Result<RtfDocument, ParserError> {
152 self.check_document_validity()?;
153 let mut document = RtfDocument::default(); document.header = self.parse_header()?;
156 let mut state_stack: Vec<ParserState> = vec![ParserState::default()];
158 let len = self.tokens.len();
160 let mut i = 0;
161
162 while i < len {
163 if self.parsed_item[i] {
164 i += 1;
166 continue;
167 }
168 let token = &self.tokens[i];
169
170 match token {
171 Token::OpeningBracket => {
172 if let Some(last_state) = state_stack.last() {
173 state_stack.push(last_state.clone()); } else {
175 state_stack.push(ParserState::default());
176 }
177 }
178 Token::ClosingBracket => {
179 let state = state_stack.pop();
180 if state.is_none() {
181 return Err(ParserError::MalformedPainterStack);
182 }
183 }
184 Token::ControlSymbol((control_word, property)) => {
185 let Some(current_state) = state_stack.last_mut() else {
186 return Err(ParserError::MalformedPainterStack);
187 };
188 let current_painter = &mut current_state.painter;
189 let paragraph = &mut current_state.paragraph;
190 #[rustfmt::skip] match control_word {
192 ControlWord::ColorNumber => current_painter.color_ref = property.get_value_as::<ColorRef>()?,
193 ControlWord::FontNumber => current_painter.font_ref = property.get_value_as::<FontRef>()?,
194 ControlWord::FontSize => current_painter.font_size = property.get_value_as::<u16>()?,
195 ControlWord::Bold => current_painter.bold = property.as_bool(),
196 ControlWord::Italic => current_painter.italic = property.as_bool(),
197 ControlWord::Underline => current_painter.underline = property.as_bool(),
198 ControlWord::UnderlineNone => current_painter.underline = false,
199 ControlWord::Superscript => current_painter.superscript = property.as_bool(),
200 ControlWord::Subscript => current_painter.subscript = property.as_bool(),
201 ControlWord::Smallcaps => current_painter.smallcaps = property.as_bool(),
202 ControlWord::Strikethrough => current_painter.strike = property.as_bool(),
203 ControlWord::Pard => *paragraph = Paragraph::default(), ControlWord::Plain => *current_painter = Painter::default(), ControlWord::ParDefTab => paragraph.tab_width = property.get_value(),
207 ControlWord::LeftAligned
208 | ControlWord::RightAligned
209 | ControlWord::Center
210 | ControlWord::Justify => paragraph.alignment = Alignment::from(control_word),
211 ControlWord::SpaceBefore => paragraph.spacing.before = property.get_value(),
212 ControlWord::SpaceAfter => paragraph.spacing.after = property.get_value(),
213 ControlWord::SpaceBetweenLine => paragraph.spacing.between_line = SpaceBetweenLine::from(property.get_value()),
214 ControlWord::SpaceLineMul => paragraph.spacing.line_multiplier = property.get_value(),
215 ControlWord::UnicodeIgnoreCount => current_state.unicode_ignore_count = property.get_value(),
216 ControlWord::Unicode => {
217 let mut unicodes = Vec::with_capacity(current_state.unicode_ignore_count as usize + 1); if let Ok(unicode) = property.get_unicode_value() {
219 unicodes.push(unicode);
220 }
221 while i + 1 < len {
223 if let Token::ControlSymbol((ControlWord::Unicode, property)) = &self.tokens[i + 1] {
225 if let Ok(unicode) = property.get_unicode_value() {
226 unicodes.push(unicode);
227 }
228 i += 1;
229 } else {
230 break;
231 }
232 }
233 if unicodes.len() > 0 {
234 let mut ignore_mask = vec![true; unicodes.len()];
236 let mut ignore_counter = 0;
237 for i in 1..unicodes.len() {
238 if unicodes[i] <= 255 && ignore_counter < current_state.unicode_ignore_count {
239 ignore_counter += 1;
240 ignore_mask[i] = false;
241 } else {
242 ignore_counter = 0;
243 }
244 }
245 let mut ignore_mask_iter = ignore_mask.iter();
246 unicodes.retain(|_| *ignore_mask_iter.next().unwrap());
247 let str = String::from_utf16(unicodes.as_slice()).unwrap();
249 Self::add_text_to_document(&str, &state_stack, &mut document)?;
250 }
251 }
252 _ => {}
254 };
255 }
256 Token::PlainText(text) => Self::add_text_to_document(*text, &state_stack, &mut document)?,
257 Token::CRLF => Self::add_text_to_document("\n", &state_stack, &mut document)?,
258 Token::IgnorableDestination => {
259 return Err(ParserError::IgnorableDestinationParsingError);
260 }
261 Token::Empty => return Err(ParserError::ParseEmptyToken),
262 };
263 i += 1;
264 }
265 return Ok(document);
266 }
267
268 fn add_text_to_document(text: &str, state_stack: &Vec<ParserState>, document: &mut RtfDocument) -> Result<(), ParserError> {
269 let Some(current_state) = state_stack.last() else {
270 return Err(ParserError::MalformedPainterStack);
271 };
272 let current_painter = ¤t_state.painter;
273 let paragraph = ¤t_state.paragraph;
274 let last_style_group = document.body.last_mut();
275 if let Some(group) = last_style_group {
277 if group.painter.eq(current_painter) && group.paragraph.eq(¶graph) {
278 group.text.push_str(text);
279 return Ok(());
280 }
281 }
282 document.body.push(StyleBlock {
284 painter: current_painter.clone(),
285 paragraph: paragraph.clone(),
286 text: String::from(text),
287 });
288 return Ok(());
289 }
290
291 fn get_token_at(&'a self, index: usize) -> Option<&'a Token<'a>> {
292 return self.tokens.get(index);
293 }
294
295 fn get_next_token(&'a self) -> Option<&'a Token<'a>> {
297 return self.get_token_at(self.cursor);
298 }
299
300 #[inline]
301 fn consume_token_at(&mut self, index: usize) -> Option<Token<'a>> {
302 if self.tokens.is_empty() || index >= self.tokens.len() {
303 return None;
304 }
305 self.cursor += 1;
307 self.parsed_item[index] = true;
308 return Some(mem::replace(&mut self.tokens[index], Token::Empty));
309 }
310
311 fn consume_next_token(&mut self) -> Option<Token<'a>> {
312 return self.consume_token_at(self.cursor);
313 }
314
315 fn _consume_tokens_until(&mut self, reference_token: &Token<'a>) -> Vec<Token<'a>> {
317 let mut ret = vec![];
318 let token_type_id = mem::discriminant(reference_token);
319 while let Some(token) = self.consume_next_token() {
320 let type_id = mem::discriminant(&token);
321 ret.push(token);
322 if type_id == token_type_id {
323 break;
324 }
325 }
326 return ret;
327 }
328
329 fn consume_tokens_until_matching_bracket(&mut self) -> Vec<Token<'a>> {
331 let mut ret = vec![];
332 let mut count = 0;
333 while let Some(token) = self.consume_next_token() {
334 match token {
335 Token::OpeningBracket => count += 1,
336 Token::ClosingBracket => count -= 1,
337 _ => {}
338 }
339 ret.push(token);
340 if count < 0 {
341 break;
342 }
343 }
344 return ret;
345 }
346
347 fn consume_group(&mut self) -> Vec<Token<'a>> {
349 self.consume_token_at(self.cursor); return self.consume_tokens_until_matching_bracket();
352 }
353
354 fn parse_header(&mut self) -> Result<RtfHeader, ParserError> {
356 self.cursor = 0; let mut header = RtfHeader::default();
358 while let (Some(token), Some(mut next_token)) = (self.get_token_at(self.cursor), self.get_token_at(self.cursor + 1)) {
359 let mut i = 0;
362 while *next_token == Token::CRLF {
363 if let Some(next_token_not_crlf) = self.get_token_at(self.cursor + 1 + i) {
364 next_token = next_token_not_crlf;
365 i += 1;
366 } else {
367 break;
368 }
369 }
370 match (token, next_token) {
371 (Token::OpeningBracket, Token::IgnorableDestination) => {
372 let ignore_group_tokens = self.consume_group();
373 Self::parse_ignore_groups(&ignore_group_tokens);
374 }
375 (Token::OpeningBracket, header_control_word!(FontTable, None)) => {
376 let font_table_tokens = self.consume_group();
377 header.font_table = Self::parse_font_table(&font_table_tokens)?;
378 }
379 (Token::OpeningBracket, header_control_word!(ColorTable, None)) => {
380 let color_table_tokens = self.consume_group();
381 header.color_table = Self::parse_color_table(&color_table_tokens)?;
382 }
383 (Token::OpeningBracket, header_control_word!(StyleSheet, None)) => {
384 let stylesheet_tokens = self.consume_group();
385 header.stylesheet = Self::parse_stylesheet(&stylesheet_tokens)?;
386 }
387 (token, _) => {
389 if let Some(charset) = CharacterSet::from(token) {
390 header.character_set = charset;
391 }
392 self.cursor += 1;
393 }
394 }
395 }
396 return Ok(header);
397 }
398
399 fn parse_font_table(font_tables_tokens: &Vec<Token<'a>>) -> Result<FontTable, ParserError> {
400 let Some(font_table_first_token) = font_tables_tokens.get(0) else {
401 return Err(ParserError::NoMoreToken);
402 };
403 if font_table_first_token != header_control_word!(FontTable, None) {
404 return Err(ParserError::InvalidToken(format!("{:?} is not a FontTable token", font_table_first_token)));
405 }
406 let mut table = HashMap::new();
407 let mut current_key = 0;
408 let mut current_font = Font::default();
409 for token in font_tables_tokens.iter() {
410 match token {
411 Token::ControlSymbol((control_word, property)) => match control_word {
412 ControlWord::FontNumber => {
413 table.insert(current_key, current_font.clone());
415 if let Property::Value(key) = property {
416 current_key = *key as FontRef;
417 } else {
418 return Err(ParserError::InvalidFontIdentifier(*property));
419 }
420 }
421 ControlWord::Unknown(name) => {
422 if let Some(font_family) = FontFamily::from(name) {
423 current_font.font_family = font_family;
424 }
425 }
426 _ => {}
427 },
428 Token::PlainText(name) => {
429 current_font.name = name.trim_end_matches(';').to_string();
430 }
431 Token::ClosingBracket => {
432 table.insert(current_key, current_font.clone());
433 } _ => {}
435 }
436 }
437 return Ok(table);
438 }
439
440 fn parse_color_table(color_table_tokens: &Vec<Token<'a>>) -> Result<ColorTable, ParserError> {
441 let Some(color_table_first_token) = color_table_tokens.get(0) else {
442 return Err(ParserError::NoMoreToken);
443 };
444 if color_table_first_token != header_control_word!(ColorTable, None) {
445 return Err(ParserError::InvalidToken(format!("ParserError: {:?} is not a ColorTable token", color_table_first_token)));
446 }
447 let mut table = HashMap::new();
448 let mut current_key = 1;
449 let mut current_color = Color::default();
450 for token in color_table_tokens.iter() {
451 match token {
452 Token::ControlSymbol((control_word, property)) => match control_word {
453 ControlWord::ColorRed => current_color.red = property.get_value_as::<u8>()?,
454 ControlWord::ColorGreen => current_color.green = property.get_value_as::<u8>()?,
455 ControlWord::ColorBlue => {
456 current_color.blue = property.get_value_as::<u8>()?;
457 table.insert(current_key, current_color.clone());
458 current_key += 1;
459 }
460 _ => {}
461 },
462 _ => {}
463 }
464 }
465 return Ok(table);
466 }
467
468 fn parse_stylesheet(_stylesheet_tokens: &Vec<Token<'a>>) -> Result<StyleSheet, ParserError> {
469 return Ok(StyleSheet::from([]));
471 }
472
473 fn parse_ignore_groups(_tokens: &Vec<Token<'a>>) {
474 }
476}
477
478#[cfg(test)]
479pub mod tests {
480 use super::*;
481 use crate::header::CharacterSet::*;
482 use crate::header::FontFamily::*;
483 use crate::header::RtfHeader;
484 use crate::include_test_file;
485 use crate::lexer::Lexer;
486
487 #[test]
488 fn parser_header() {
489 let tokens = Lexer::scan(r#"{ \rtf1\ansi{\fonttbl\f0\fswiss Helvetica;}\f0\pard Voici du texte en {\b gras}.\par }"#).unwrap();
490 let doc = Parser::new(tokens).parse().unwrap();
491 assert_eq!(
492 doc.header,
493 RtfHeader {
494 character_set: Ansi,
495 font_table: FontTable::from([(
496 0,
497 Font {
498 name: "Helvetica".into(),
499 character_set: 0,
500 font_family: Swiss
501 }
502 )]),
503 ..RtfHeader::default()
504 }
505 );
506 assert_eq!(
507 doc.body,
508 [
509 StyleBlock {
510 painter: Painter::default(),
511 paragraph: Default::default(),
512 text: "Voici du texte en ".into(),
513 },
514 StyleBlock {
515 painter: Painter { bold: true, ..Painter::default() },
516 paragraph: Default::default(),
517 text: "gras".into(),
518 },
519 StyleBlock {
520 painter: Painter::default(),
521 paragraph: Default::default(),
522 text: ".".into(),
523 },
524 ]
525 );
526 }
527
528 #[test]
529 fn parse_multiline_document() {
530 let document = r"{\rtf1\ansi\deff0 {\fonttbl {\f0 Courier;}{\f1 ProFontWindows;}}
531 {\colortbl;\red0\green0\blue0;\red255\green0\blue0;\red255\green255\blue0;}
532 This line is font 0 which is courier\line
533 \f1
534 This line is font 1\line
535 \f0
536 This line is font 0 again\line
537 This line has a \cf2 red \cf1 word\line
538 \highlight3 while this line has a \cf2 red \cf1 word and is highlighted in yellow\highlight0\line
539 Finally, back to the default color.\line
540 }";
541 let tokens = Lexer::scan(document).unwrap();
542 let _doc = Parser::new(tokens).parse().unwrap();
543 }
544
545 #[test]
546 fn parse_entire_file_header() {
547 let file_content = include_test_file!("test-file.rtf");
548 let tokens = Lexer::scan(file_content).unwrap();
549 let doc = Parser::new(tokens).parse().unwrap();
550 assert_eq!(
551 doc.header,
552 RtfHeader {
553 character_set: Ansi,
554 font_table: FontTable::from([
555 (
556 0,
557 Font {
558 name: "Helvetica".into(),
559 character_set: 0,
560 font_family: Swiss,
561 }
562 ),
563 (
564 1,
565 Font {
566 name: "Helvetica-Bold".into(),
567 character_set: 0,
568 font_family: Swiss,
569 }
570 )
571 ]),
572 color_table: ColorTable::from([(1, Color { red: 255, green: 255, blue: 255 }),]),
573 ..RtfHeader::default()
574 }
575 );
576 }
577
578 #[test]
579 fn parse_ignore_group() {
580 let rtf = r"{\*\expandedcolortbl;;}";
581 let tokens = Lexer::scan(rtf).unwrap();
582 let mut parser = Parser::new(tokens);
583 let document = parser.parse().unwrap();
584 assert_eq!(parser.get_tokens(), Vec::<&Token>::new()); assert_eq!(document.header, RtfHeader::default());
586 }
587
588 #[test]
589 fn parse_ignore_group_with_crlf() {
590 let rtf = r"{\
591 \
592 \*\expandedcolortbl;;}";
593 let tokens = Lexer::scan(rtf).unwrap();
594 let mut parser = Parser::new(tokens);
595 let document = parser.parse().unwrap();
596 assert_eq!(parser.get_tokens(), Vec::<&Token>::new()); assert_eq!(document.header, RtfHeader::default());
598 }
599
600 #[test]
601 fn parse_whitespaces() {
602 let file_content = include_test_file!("list-item.rtf");
603 let tokens = Lexer::scan(file_content).unwrap();
604 let mut parser = Parser::new(tokens);
605 let document = parser.parse().unwrap();
606 assert_eq!(
607 document.body,
608 vec![StyleBlock {
609 painter: Painter { font_size: 24, ..Painter::default() },
610 paragraph: Default::default(),
611 text: "\nEmpty start\n\nList test : \n - item 1\n - item 2\n - item 3\n - item 4".into(),
612 },]
613 );
614 }
615
616 #[test]
617 fn parse_image_data() {
618 let rtf_content = include_test_file!("file-with-image.rtf");
620 let tokens = Lexer::scan(rtf_content).unwrap();
621 let _document = Parser::new(tokens).parse();
622 }
623
624 #[test]
625 fn parse_header_and_body() {
626 let rtf = r#"{\rtf1\ansi\ansicpg1252\cocoartf2639
627\cocoatextscaling0\cocoaplatform0{\fonttbl\f0\froman\fcharset0 Times-Bold;\f1\froman\fcharset0 Times-Roman;\f2\froman\fcharset0 Times-Italic;
628\f3\fswiss\fcharset0 Helvetica;}
629{\colortbl;\red255\green255\blue255;\red0\green0\blue10;\red0\green0\blue1;\red191\green191\blue191;
630}
631\f0\b\fs21 \cf2 Lorem ipsum
632\fs56 \
633\pard\pardeftab709\sl288\slmult1\sa225\qj\partightenfactor0
634
635\f1\b0\fs21 \cf0 \
636\pard\pardeftab709\fi-432\ri-1\sb240\sa120\partightenfactor0
637\ls1\ilvl0
638\f0\b\fs36\cf2\plain Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc ac faucibus odio. \
639\pard\pardeftab709\sl288\slmult1\sa225\qj\partightenfactor0
640}"#;
641 let tokens = Lexer::scan(rtf).unwrap();
642 let document = Parser::new(tokens).parse().unwrap();
643 assert_eq!(document.body[0].text, "Lorem ipsum");
644 assert_eq!(document.body[1].text, "\n");
645 assert_eq!(document.body[2].text, "\n");
646 assert_eq!(document.body[3].text, "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc ac faucibus odio. \n");
647 }
648
649 #[test]
650 fn parse_paragraph_aligment() {
651 let rtf = r#"{\rtf1\ansi\deff0 {\fonttbl {\f0 Times;}}
652 \fs34
653 {\pard \qc \fs60 Annalium Romae\par}
654 {\pard \qj
655 Urbem Romam a principio reges habuere; libertatem et
656 \par}
657 {\pard \ql
658 Non Cinnae, non Sullae longa dominatio; et Pompei Crassique potentia
659 \par}"#;
660 let tokens = Lexer::scan(rtf).unwrap();
661 let document = Parser::new(tokens).parse().unwrap();
662 assert_eq!(document.body[0].paragraph.alignment, Alignment::Center);
663 assert_eq!(document.body[1].paragraph.alignment, Alignment::Justify);
664 assert_eq!(document.body[2].paragraph.alignment, Alignment::LeftAligned);
665 }
666
667 #[test]
668 fn should_parse_escaped_char() {
669 let rtf = r"{\rtf1\ansi\deff0 {\fonttbl {\f0 Times;}}je suis une b\'eate}";
670 let tokens = Lexer::scan(rtf).unwrap();
671 let document = Parser::new(tokens).parse().unwrap();
672 assert_eq!(document.body[0].text, "je suis une bête");
673 }
674
675 #[test]
676 fn parse_plain_directive() {
677 let rtf = r"{\rtf1{\fonttbl {\f0 Times;}}\f0\b\fs36\u\cf2\plain Plain text}";
678 let tokens = Lexer::scan(rtf).unwrap();
679 let document = Parser::new(tokens).parse().unwrap();
680 assert_eq!(document.body[0].painter, Painter::default());
681 }
682
683 #[test]
684 fn parse_color_table() {
685 let rtf = r#"{\rtf1\ansi\ansicpg936\cocoartf2761
687 \cocoatextscaling0\cocoaplatform0{\fonttbl\f0\fswiss\fcharset0 Helvetica;\f1\fnil\fcharset134 PingFangSC-Regular;}
688 {\colortbl;\red255\green255\blue255;\red251\green2\blue7;\red114\green44\blue253;}
689 {\*\expandedcolortbl;;\cssrgb\c100000\c14913\c0;\cssrgb\c52799\c30710\c99498;}
690 \f0\fs24 \cf2 A
691 \f1 \cf3 B}"#;
692 let tokens = Lexer::scan(rtf).unwrap();
693 let document = Parser::new(tokens).parse().unwrap();
694 assert_eq!(document.header.color_table.get(&document.body[0].painter.color_ref).unwrap(), &Color { red: 251, green: 2, blue: 7 });
695 }
696
697 #[test]
698 fn parse_underline() {
699 let rtf = r#"{\rtf1\ansi\ansicpg936\cocoartf2761
702 \cocoatextscaling0\cocoaplatform0{\fonttbl\f0\fswiss\fcharset0 Helvetica;}
703 {\colortbl;\red255\green255\blue255;}
704 {\*\expandedcolortbl;;}
705 \paperw11900\paperh16840\margl1440\margr1440\vieww11520\viewh8400\viewkind0
706 \pard\tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\tx7920\tx8640\pardirnatural\partightenfactor0
707
708 \f0\fs24 \cf0 \ul \ulc0 a\ulnone A}"#;
709 let tokens = Lexer::scan(rtf).unwrap();
710 let document = Parser::new(tokens).parse().unwrap();
711 assert_eq!(&document.body[0].painter.underline, &true);
712 assert_eq!(&document.body[1].painter.underline, &false);
713 }
714
715 #[test]
716 fn parse_unicode() {
717 let rtf = r#"{\rtf1\ansi\ansicpg936\cocoartf2761
720 \cocoatextscaling0\cocoaplatform0{\fonttbl\f0\fswiss\fcharset0 Helvetica;}
721 \f0\fs24 \cf0 \uc0\u21834 \u21834 }"#;
722 let tokens = Lexer::scan(rtf).unwrap();
723 let document = Parser::new(tokens).parse().unwrap();
724 assert_eq!(&document.body[0].text, "啊 啊");
725 }
726
727 #[test]
728 fn parse_two_characters_compound_unicode() {
729 let rtf = r#"{\rtf1\ansi
730 \f0 a\u55357 \u56447 1 \u21834}"#;
731 let tokens = Lexer::scan(rtf).unwrap();
732 let document = Parser::new(tokens).parse().unwrap();
733 assert_eq!(&document.body[0].text, "a👿1 啊");
734 }
735
736 #[test]
737 fn parse_unicode_with_fallback() {
738 let rtf = r#"{\rtf1\ansi
740 {\f0 \u-10179\'5f\u-9089\'5f}
741 {\f1 \uc2\u32767\'c2\'52}
742 {\f2 \uc2\u26789\'97\'73}
743 {\f3 b\'eate}
744 {\f4 \uc0 b\'ea\'eate}
745 }"#;
746 let tokens = Lexer::scan(rtf).unwrap();
747 let document = Parser::new(tokens).parse().unwrap();
748 assert_eq!(&document.body[0].text, "👿");
749 assert_eq!(&document.body[1].text, "翿");
750 assert_eq!(&document.body[2].text, "梥");
751 assert_eq!(&document.body[3].text, "bête");
752 assert_eq!(&document.body[4].text, "bêête");
753 }
754
755 #[test]
756 fn body_starts_with_a_group() {
757 let rtf = r"{\rtf1\ansi\deff0{\fonttbl {\f0\fnil\fcharset0 Calibri;}{\f1\fnil\fcharset2 Symbol;}}{\colortbl ;}{\pard \u21435 \sb70\par}}";
758 let tokens = Lexer::scan(rtf).unwrap();
759 let _document = Parser::new(tokens).parse().unwrap();
760 }
761
762 #[test]
763 fn rtf_different_semantic() {
764 let rtf1 = r"{\rtf1 \b bold \i Bold Italic \i0 Bold again}";
765 let rtf2 = r"{\rtf1 \b bold {\i Bold Italic }Bold again}";
766 let rtf3 = r"{\rtf1 \b bold \i Bold Italic \plain\b Bold again}";
767 let doc1 = RtfDocument::try_from(rtf1).unwrap();
768 let doc2 = RtfDocument::try_from(rtf2).unwrap();
769 let doc3 = RtfDocument::try_from(rtf3).unwrap();
770 assert_eq!(doc1.body, doc2.body);
771 assert_eq!(doc3.body, doc2.body);
772 }
773}