malwaredb_types/doc/
rtf.rs1use crate::utils::find_subsequence;
4use crate::SpecimenFile;
5
6use std::fmt::{Display, Formatter};
7
8use crate::doc::DocumentFile;
9use anyhow::{bail, Result};
10use chrono::{DateTime, Utc};
11use tracing::instrument;
12
13const RTF_MAGIC: [u8; 4] = [0x7B, 0x5C, 0x72, 0x74];
14
15const TITLE_HEADER: [u8; 13] = [
17 0x7B, 0x5C, 0x69, 0x6E, 0x66, 0x6F, 0x7B, 0x5C, 0x74, 0x69, 0x74, 0x6C, 0x65,
18]; const ANSI_CHARSET: [u8; 5] = [0x5C, 0x61, 0x6E, 0x73, 0x69]; const MAC_CHARSET: [u8; 4] = [0x5C, 0x6D, 0x61, 0x63]; const PC_CHARSET: [u8; 3] = [0x5C, 0x70, 0x63]; const PCA_CHARSET: [u8; 4] = [0x5C, 0x70, 0x63, 0x61]; #[derive(Copy, Clone, Debug)]
29pub enum CharacterSet {
30 Ansi,
32
33 MacAnsi,
35
36 Pc,
38
39 Pca,
41}
42
43#[derive(Clone, Debug)]
45pub struct Rtf<'a> {
46 pub character_set: Option<CharacterSet>,
48
49 pub title: Option<String>,
51
52 pub contents: &'a [u8],
54}
55
56impl<'a> Rtf<'a> {
57 #[instrument(name = "RTF parser", skip(contents))]
63 pub fn from(contents: &'a [u8]) -> Result<Self> {
64 let doc_front = &contents[..40.min(contents.len())];
65
66 if !doc_front.starts_with(&RTF_MAGIC) {
67 bail!("not an RTF");
68 }
69
70 let mut character_set = None;
71 if find_subsequence(doc_front, &ANSI_CHARSET).is_some() {
72 character_set = Some(CharacterSet::Ansi);
73 } else if find_subsequence(doc_front, &MAC_CHARSET).is_some() {
74 character_set = Some(CharacterSet::MacAnsi);
75 } else if find_subsequence(doc_front, &PCA_CHARSET).is_some() {
76 character_set = Some(CharacterSet::Pca);
77 } else if find_subsequence(doc_front, &PC_CHARSET).is_some() {
78 character_set = Some(CharacterSet::Pc);
79 }
80
81 let mut title = None;
82 if contents.len() < 100_000 {
83 title = if let Some(start_index) = find_subsequence(contents, &TITLE_HEADER) {
84 let start_index = start_index + TITLE_HEADER.len() + 1; let mut end_index = start_index;
86 while contents[end_index] != 0x7D
87 && end_index < start_index + 200
88 && end_index < contents.len()
89 {
90 end_index += 1;
91 }
92 if end_index < contents.len() && end_index > start_index + 1 {
93 String::from_utf8(contents[start_index..end_index].to_vec()).ok()
94 } else {
95 None }
97 } else {
98 None };
100 }
101
102 Ok(Self {
103 character_set,
104 title,
105 contents,
106 })
107 }
108}
109
110impl DocumentFile for Rtf<'_> {
112 fn pages(&self) -> u32 {
113 0
114 }
115
116 fn author(&self) -> Option<String> {
117 None
118 }
119
120 fn title(&self) -> Option<String> {
121 None
122 }
123
124 fn has_javascript(&self) -> bool {
125 false }
127
128 fn has_form(&self) -> bool {
129 false
130 }
131
132 fn creation_time(&self) -> Option<DateTime<Utc>> {
133 None
134 }
135
136 fn modification_time(&self) -> Option<DateTime<Utc>> {
137 None
138 }
139}
140
141impl SpecimenFile for Rtf<'_> {
142 const MAGIC: &'static [&'static [u8]] = &[&RTF_MAGIC];
143
144 fn type_name(&self) -> &'static str {
145 "RTF"
146 }
147}
148
149impl Display for Rtf<'_> {
150 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
151 write!(f, "RTF")?;
152 if let Some(title) = &self.title {
153 write!(f, ", Title: \"{title}\"")?;
154 }
155 if let Some(charset) = &self.character_set {
156 write!(f, ", Character Set: {charset:?}")?;
157 }
158 write!(f, ", Bytes: {}", self.contents.len())
159 }
160}
161
162#[cfg(test)]
163mod tests {
164 use super::*;
165
166 #[test]
167 fn rtf() {
168 const BYTES: &[u8] = include_bytes!("../../testdata/rtf/hello.rtf");
169
170 let rtf = Rtf::from(BYTES);
171 assert!(rtf.is_ok());
172
173 let rtf = rtf.unwrap();
174 println!("RTF: {rtf}");
175 assert_eq!(rtf.title.unwrap(), "RTF Title");
176 }
177
178 #[test]
179 fn emtpy() {
180 const BYTES: &[u8] = include_bytes!("../../testdata/rtf/empty.rtf");
181
182 let rtf = Rtf::from(BYTES);
183 assert!(rtf.is_ok());
184 assert!(rtf.unwrap().title.is_none());
185 }
186}