1#![doc = include_str!("../README.md")]
2
3use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
4use std::io::{self, Cursor, Write};
5use thiserror::Error;
6
7mod crc;
8mod dictionary;
9
10use dictionary::{DictionaryReference, TokenDictionary};
11
12#[derive(Error, Debug)]
13pub enum Error {
14 #[error("IO error: {0:?}")]
15 IoError(#[from] io::Error),
16 #[error("COMPSIZE mismatch: {0}")]
17 CompressedSizeMismatch(u32),
18 #[error("COMPRESSED CRC mismatch: 0x{0:08X}")]
19 CompressedCrcMismatch(u32),
20 #[error("Invalid COMPTYPE: 0x{0:08X}")]
21 InvalidCompressionType(u32),
22 #[error("Dictionary reference error: {0:?}")]
23 DictionaryError(#[from] dictionary::Error),
24 #[error("Invalid ASCII RTF content")]
25 InvalidAsciiRtf,
26 #[error("COMPRESSED RTF too large: {0}")]
27 CompressedRtfTooLarge(usize),
28 #[error("UNCOMPRESSED RTF too large: {0}")]
29 UncompressedRtfTooLarge(usize),
30}
31
32pub type Result<T> = std::result::Result<T, Error>;
33
34const COMPRESSED: u32 = 0x75465A4C;
35const UNCOMPRESSED: u32 = 0x414C454D;
36
37pub fn decompress_rtf(data: &[u8]) -> Result<String> {
38 let total_size = data.len();
39 let mut cursor = Cursor::new(&data[..16]);
40 let compressed_size = cursor.read_u32::<LittleEndian>()?;
41
42 if compressed_size as usize + size_of_val(&compressed_size) != total_size {
43 return Err(Error::CompressedSizeMismatch(compressed_size));
44 }
45
46 let raw_size = cursor.read_u32::<LittleEndian>()?;
47 let compression_type = cursor.read_u32::<LittleEndian>()?;
48 let crc = cursor.read_u32::<LittleEndian>()?;
49
50 match compression_type {
51 COMPRESSED => {
52 let compressed_crc = crc::calculate_crc(0, &data[16..]);
53 if crc != compressed_crc {
54 return Err(Error::CompressedCrcMismatch(crc));
55 }
56
57 let mut dictionary = TokenDictionary::default();
58 let mut output = Vec::with_capacity(raw_size as usize);
59
60 let mut cursor = Cursor::new(&data[16..]);
61 'decompress: while let Ok(control) = cursor.read_u8() {
62 for i in 0..8 {
63 let bit = control & (0x01 << i);
64 if bit == 0 {
65 let Ok(byte) = cursor.read_u8() else {
66 break 'decompress;
67 };
68 output.push(byte);
69 dictionary.write_byte(byte);
70 } else {
71 let reference = DictionaryReference::read(&mut cursor)?;
72 let Some(mut reference) = dictionary.read_reference(reference) else {
73 break 'decompress;
74 };
75 output.append(&mut reference);
76 }
77 }
78 }
79
80 Ok(string_from_ascii(&output))
81 }
82 UNCOMPRESSED => Ok(string_from_ascii(&data[16..raw_size as usize + 16])),
83 invalid => Err(Error::InvalidCompressionType(invalid)),
84 }
85}
86
87fn string_from_ascii(data: &[u8]) -> String {
88 let data: Vec<_> = data
89 .iter()
90 .copied()
91 .take_while(|b| *b != 0)
92 .map(u16::from)
93 .collect();
94 String::from_utf16_lossy(&data)
95}
96
97fn convert_to_ascii(rtf: &str) -> Result<Vec<u8>> {
98 rtf.encode_utf16()
99 .map(|ch| u8::try_from(ch).map_err(|_| Error::InvalidAsciiRtf))
100 .collect()
101}
102
103pub fn compress_rtf(rtf: &str) -> Result<Vec<u8>> {
104 let data = convert_to_ascii(rtf)?;
105 if data.len() > u32::MAX as usize - 12 {
106 return Err(Error::UncompressedRtfTooLarge(data.len()));
107 }
108
109 let mut output = Cursor::new(Vec::with_capacity(data.len() + 16));
110 output.write_all(&[0_u8; 16])?;
111
112 let mut read_offset = 0;
113 let mut dictionary = TokenDictionary::default();
114 let mut control = 0;
115 let mut run_buffer = [0_u8; 16];
116 let mut run_length = 0;
117
118 'runs: while read_offset <= data.len() {
119 let mut cursor = Cursor::new(run_buffer.as_mut_slice());
120
121 control = 0;
122 run_length = 0;
123
124 for i in 0..8 {
125 if read_offset >= data.len() {
126 dictionary.final_reference().write(&mut cursor)?;
127 control |= 0x01 << i;
128 run_length += 2;
129 break 'runs;
130 }
131
132 match dictionary.find_longest_match(&data[read_offset..])? {
133 Some(best_match) => {
134 best_match.write(&mut cursor)?;
135 let best_match_length = best_match.length() as usize;
136 read_offset += best_match_length;
137 control |= 0x01 << i;
138 run_length += 2;
139 }
140 None => {
141 let byte = data[read_offset];
142 cursor.write_u8(byte)?;
143 read_offset += 1;
144 run_length += 1;
145 }
146 }
147 }
148
149 output.write_u8(control)?;
150 output.write_all(&run_buffer[..run_length])?;
151 run_length = 0;
152 }
153
154 if run_length > 0 {
155 output.write_u8(control)?;
156 output.write_all(&run_buffer[..run_length])?;
157 }
158
159 let mut output = output.into_inner();
160 if output.len() > u32::MAX as usize - 12 {
161 return Err(Error::CompressedRtfTooLarge(output.len()));
162 }
163 let compressed_size = output.len() as u32;
164 let compressed_size = compressed_size - size_of_val(&compressed_size) as u32;
165 let raw_size = data.len() as u32;
166 let compression_type = COMPRESSED;
167 let crc = crc::calculate_crc(0, &output[16..]);
168
169 let mut header = Cursor::new(&mut output[..16]);
170 header.write_u32::<LittleEndian>(compressed_size)?;
171 header.write_u32::<LittleEndian>(raw_size)?;
172 header.write_u32::<LittleEndian>(compression_type)?;
173 header.write_u32::<LittleEndian>(crc)?;
174
175 Ok(output)
176}
177
178pub fn encode_rtf(rtf: &str) -> Result<Vec<u8>> {
179 let data = convert_to_ascii(rtf)?;
180 if data.len() > u32::MAX as usize - 12 {
181 return Err(Error::UncompressedRtfTooLarge(data.len()));
182 }
183 let raw_size = data.len() as u32;
184 let compressed_size = raw_size + 12;
185 let compression_type = UNCOMPRESSED;
186 let crc = 0;
187
188 let mut cursor = Cursor::new(Vec::with_capacity(raw_size as usize + 16));
189 cursor.write_u32::<LittleEndian>(compressed_size)?;
190 cursor.write_u32::<LittleEndian>(raw_size)?;
191 cursor.write_u32::<LittleEndian>(compression_type)?;
192 cursor.write_u32::<LittleEndian>(crc)?;
193 cursor.write_all(&data)?;
194
195 Ok(cursor.into_inner())
196}
197
198#[cfg(test)]
199mod tests {
200 use super::*;
201
202 const COMPRESSED_SIMPLE_RTF: &[u8] = &[
203 0x2d, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x4c, 0x5a, 0x46, 0x75, 0xf1, 0xc5, 0xc7,
204 0xa7, 0x03, 0x00, 0x0a, 0x00, 0x72, 0x63, 0x70, 0x67, 0x31, 0x32, 0x35, 0x42, 0x32, 0x0a,
205 0xf3, 0x20, 0x68, 0x65, 0x6c, 0x09, 0x00, 0x20, 0x62, 0x77, 0x05, 0xb0, 0x6c, 0x64, 0x7d,
206 0x0a, 0x80, 0x0f, 0xa0,
207 ];
208
209 const UNCOMPRESSED_SIMPLE_RTF: &str = "{\\rtf1\\ansi\\ansicpg1252\\pard hello world}\r\n";
210
211 #[test]
213 fn test_decompress_simple_rtf() {
214 let rtf = decompress_rtf(&COMPRESSED_SIMPLE_RTF).unwrap();
215 assert_eq!(rtf, UNCOMPRESSED_SIMPLE_RTF);
216 }
217
218 #[test]
220 fn test_compress_simple_rtf() {
221 let compressed = compress_rtf(UNCOMPRESSED_SIMPLE_RTF).unwrap();
222 assert_eq!(&compressed, COMPRESSED_SIMPLE_RTF);
223 }
224
225 const COMPRESSED_CROSSING_WRITE_RTF: &[u8] = &[
226 0x1a, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x4c, 0x5a, 0x46, 0x75, 0xe2, 0xd4, 0x4b,
227 0x51, 0x41, 0x00, 0x04, 0x20, 0x57, 0x58, 0x59, 0x5a, 0x0d, 0x6e, 0x7d, 0x01, 0x0e, 0xb0,
228 ];
229
230 const UNCOMPRESSED_CROSSING_WRITE_RTF: &str = "{\\rtf1 WXYZWXYZWXYZWXYZWXYZ}";
231
232 #[test]
234 fn test_decompress_crossing_write_rtf() {
235 let rtf = decompress_rtf(&COMPRESSED_CROSSING_WRITE_RTF).unwrap();
236 assert_eq!(rtf, UNCOMPRESSED_CROSSING_WRITE_RTF);
237 }
238
239 #[test]
241 fn test_compress_crossing_write_rtf() {
242 let compressed = compress_rtf(UNCOMPRESSED_CROSSING_WRITE_RTF).unwrap();
243 assert_eq!(&compressed, COMPRESSED_CROSSING_WRITE_RTF);
244 }
245}