Skip to main content

compressed_rtf/
lib.rs

1#![doc = include_str!("../README.md")]
2
3use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
4use std::io::{self, Cursor, Write};
5use thiserror::Error;
6
7mod crc;
8mod dictionary;
9
10use dictionary::{DictionaryReference, TokenDictionary};
11
12#[derive(Error, Debug)]
13pub enum Error {
14    #[error("IO error: {0:?}")]
15    IoError(#[from] io::Error),
16    #[error("COMPSIZE mismatch: {0}")]
17    CompressedSizeMismatch(u32),
18    #[error("COMPRESSED CRC mismatch: 0x{0:08X}")]
19    CompressedCrcMismatch(u32),
20    #[error("Invalid COMPTYPE: 0x{0:08X}")]
21    InvalidCompressionType(u32),
22    #[error("Dictionary reference error: {0:?}")]
23    DictionaryError(#[from] dictionary::Error),
24    #[error("Invalid ASCII RTF content")]
25    InvalidAsciiRtf,
26    #[error("COMPRESSED RTF too large: {0}")]
27    CompressedRtfTooLarge(usize),
28    #[error("UNCOMPRESSED RTF too large: {0}")]
29    UncompressedRtfTooLarge(usize),
30}
31
32pub type Result<T> = std::result::Result<T, Error>;
33
34const COMPRESSED: u32 = 0x75465A4C;
35const UNCOMPRESSED: u32 = 0x414C454D;
36
37pub fn decompress_rtf(data: &[u8]) -> Result<String> {
38    let total_size = data.len();
39    let mut cursor = Cursor::new(&data[..16]);
40    let compressed_size = cursor.read_u32::<LittleEndian>()?;
41
42    if compressed_size as usize + size_of_val(&compressed_size) != total_size {
43        return Err(Error::CompressedSizeMismatch(compressed_size));
44    }
45
46    let raw_size = cursor.read_u32::<LittleEndian>()?;
47    let compression_type = cursor.read_u32::<LittleEndian>()?;
48    let crc = cursor.read_u32::<LittleEndian>()?;
49
50    match compression_type {
51        COMPRESSED => {
52            let compressed_crc = crc::calculate_crc(0, &data[16..]);
53            if crc != compressed_crc {
54                return Err(Error::CompressedCrcMismatch(crc));
55            }
56
57            let mut dictionary = TokenDictionary::default();
58            let mut output = Vec::with_capacity(raw_size as usize);
59
60            let mut cursor = Cursor::new(&data[16..]);
61            'decompress: while let Ok(control) = cursor.read_u8() {
62                for i in 0..8 {
63                    let bit = control & (0x01 << i);
64                    if bit == 0 {
65                        let Ok(byte) = cursor.read_u8() else {
66                            break 'decompress;
67                        };
68                        output.push(byte);
69                        dictionary.write_byte(byte);
70                    } else {
71                        let reference = DictionaryReference::read(&mut cursor)?;
72                        let Some(mut reference) = dictionary.read_reference(reference) else {
73                            break 'decompress;
74                        };
75                        output.append(&mut reference);
76                    }
77                }
78            }
79
80            Ok(string_from_ascii(&output))
81        }
82        UNCOMPRESSED => Ok(string_from_ascii(&data[16..raw_size as usize + 16])),
83        invalid => Err(Error::InvalidCompressionType(invalid)),
84    }
85}
86
87fn string_from_ascii(data: &[u8]) -> String {
88    let data: Vec<_> = data
89        .iter()
90        .copied()
91        .take_while(|b| *b != 0)
92        .map(u16::from)
93        .collect();
94    String::from_utf16_lossy(&data)
95}
96
97fn convert_to_ascii(rtf: &str) -> Result<Vec<u8>> {
98    rtf.encode_utf16()
99        .map(|ch| u8::try_from(ch).map_err(|_| Error::InvalidAsciiRtf))
100        .collect()
101}
102
103pub fn compress_rtf(rtf: &str) -> Result<Vec<u8>> {
104    let data = convert_to_ascii(rtf)?;
105    if data.len() > u32::MAX as usize - 12 {
106        return Err(Error::UncompressedRtfTooLarge(data.len()));
107    }
108
109    let mut output = Cursor::new(Vec::with_capacity(data.len() + 16));
110    output.write_all(&[0_u8; 16])?;
111
112    let mut read_offset = 0;
113    let mut dictionary = TokenDictionary::default();
114    let mut control = 0;
115    let mut run_buffer = [0_u8; 16];
116    let mut run_length = 0;
117
118    'runs: while read_offset <= data.len() {
119        let mut cursor = Cursor::new(run_buffer.as_mut_slice());
120
121        control = 0;
122        run_length = 0;
123
124        for i in 0..8 {
125            if read_offset >= data.len() {
126                dictionary.final_reference().write(&mut cursor)?;
127                control |= 0x01 << i;
128                run_length += 2;
129                break 'runs;
130            }
131
132            match dictionary.find_longest_match(&data[read_offset..])? {
133                Some(best_match) => {
134                    best_match.write(&mut cursor)?;
135                    let best_match_length = best_match.length() as usize;
136                    read_offset += best_match_length;
137                    control |= 0x01 << i;
138                    run_length += 2;
139                }
140                None => {
141                    let byte = data[read_offset];
142                    cursor.write_u8(byte)?;
143                    read_offset += 1;
144                    run_length += 1;
145                }
146            }
147        }
148
149        output.write_u8(control)?;
150        output.write_all(&run_buffer[..run_length])?;
151        run_length = 0;
152    }
153
154    if run_length > 0 {
155        output.write_u8(control)?;
156        output.write_all(&run_buffer[..run_length])?;
157    }
158
159    let mut output = output.into_inner();
160    if output.len() > u32::MAX as usize - 12 {
161        return Err(Error::CompressedRtfTooLarge(output.len()));
162    }
163    let compressed_size = output.len() as u32;
164    let compressed_size = compressed_size - size_of_val(&compressed_size) as u32;
165    let raw_size = data.len() as u32;
166    let compression_type = COMPRESSED;
167    let crc = crc::calculate_crc(0, &output[16..]);
168
169    let mut header = Cursor::new(&mut output[..16]);
170    header.write_u32::<LittleEndian>(compressed_size)?;
171    header.write_u32::<LittleEndian>(raw_size)?;
172    header.write_u32::<LittleEndian>(compression_type)?;
173    header.write_u32::<LittleEndian>(crc)?;
174
175    Ok(output)
176}
177
178pub fn encode_rtf(rtf: &str) -> Result<Vec<u8>> {
179    let data = convert_to_ascii(rtf)?;
180    if data.len() > u32::MAX as usize - 12 {
181        return Err(Error::UncompressedRtfTooLarge(data.len()));
182    }
183    let raw_size = data.len() as u32;
184    let compressed_size = raw_size + 12;
185    let compression_type = UNCOMPRESSED;
186    let crc = 0;
187
188    let mut cursor = Cursor::new(Vec::with_capacity(raw_size as usize + 16));
189    cursor.write_u32::<LittleEndian>(compressed_size)?;
190    cursor.write_u32::<LittleEndian>(raw_size)?;
191    cursor.write_u32::<LittleEndian>(compression_type)?;
192    cursor.write_u32::<LittleEndian>(crc)?;
193    cursor.write_all(&data)?;
194
195    Ok(cursor.into_inner())
196}
197
198#[cfg(test)]
199mod tests {
200    use super::*;
201
202    const COMPRESSED_SIMPLE_RTF: &[u8] = &[
203        0x2d, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x4c, 0x5a, 0x46, 0x75, 0xf1, 0xc5, 0xc7,
204        0xa7, 0x03, 0x00, 0x0a, 0x00, 0x72, 0x63, 0x70, 0x67, 0x31, 0x32, 0x35, 0x42, 0x32, 0x0a,
205        0xf3, 0x20, 0x68, 0x65, 0x6c, 0x09, 0x00, 0x20, 0x62, 0x77, 0x05, 0xb0, 0x6c, 0x64, 0x7d,
206        0x0a, 0x80, 0x0f, 0xa0,
207    ];
208
209    const UNCOMPRESSED_SIMPLE_RTF: &str = "{\\rtf1\\ansi\\ansicpg1252\\pard hello world}\r\n";
210
211    /// [Example 1: Simple Compressed RTF](https://learn.microsoft.com/en-us/openspecs/exchange_server_protocols/ms-oxrtfcp/029bff74-8c00-402e-ac2b-0210a5f57371)
212    #[test]
213    fn test_decompress_simple_rtf() {
214        let rtf = decompress_rtf(&COMPRESSED_SIMPLE_RTF).unwrap();
215        assert_eq!(rtf, UNCOMPRESSED_SIMPLE_RTF);
216    }
217
218    /// [Example 1: Simple RTF](https://learn.microsoft.com/en-us/openspecs/exchange_server_protocols/ms-oxrtfcp/ba662823-d47a-4db3-ad45-a368a82acc90)
219    #[test]
220    fn test_compress_simple_rtf() {
221        let compressed = compress_rtf(UNCOMPRESSED_SIMPLE_RTF).unwrap();
222        assert_eq!(&compressed, COMPRESSED_SIMPLE_RTF);
223    }
224
225    const COMPRESSED_CROSSING_WRITE_RTF: &[u8] = &[
226        0x1a, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x4c, 0x5a, 0x46, 0x75, 0xe2, 0xd4, 0x4b,
227        0x51, 0x41, 0x00, 0x04, 0x20, 0x57, 0x58, 0x59, 0x5a, 0x0d, 0x6e, 0x7d, 0x01, 0x0e, 0xb0,
228    ];
229
230    const UNCOMPRESSED_CROSSING_WRITE_RTF: &str = "{\\rtf1 WXYZWXYZWXYZWXYZWXYZ}";
231
232    /// [Example 2: Reading a Token from the Dictionary that Crosses WritePosition](https://learn.microsoft.com/en-us/openspecs/exchange_server_protocols/ms-oxrtfcp/421a2da5-7752-4985-8981-0f19f1e5b687)
233    #[test]
234    fn test_decompress_crossing_write_rtf() {
235        let rtf = decompress_rtf(&COMPRESSED_CROSSING_WRITE_RTF).unwrap();
236        assert_eq!(rtf, UNCOMPRESSED_CROSSING_WRITE_RTF);
237    }
238
239    /// [Example 2: Compressing with Tokens that Cross WritePosition](https://learn.microsoft.com/en-us/openspecs/exchange_server_protocols/ms-oxrtfcp/59eb3a35-6ee1-4a08-93b9-b9f4a7e3a0ca)
240    #[test]
241    fn test_compress_crossing_write_rtf() {
242        let compressed = compress_rtf(UNCOMPRESSED_CROSSING_WRITE_RTF).unwrap();
243        assert_eq!(&compressed, COMPRESSED_CROSSING_WRITE_RTF);
244    }
245}