malwaredb_types/doc/
rtf.rs1use crate::utils::find_subsequence;
4use crate::SpecimenFile;
5
6use std::fmt::{Display, Formatter};
7
8use crate::doc::DocumentFile;
9use anyhow::{bail, Result};
10use chrono::{DateTime, Utc};
11use tracing::instrument;
12
13const RTF_MAGIC: [u8; 4] = [0x7B, 0x5C, 0x72, 0x74];
14
15const TITLE_HEADER: [u8; 13] = [
17 0x7B, 0x5C, 0x69, 0x6E, 0x66, 0x6F, 0x7B, 0x5C, 0x74, 0x69, 0x74, 0x6C, 0x65,
18]; const ANSI_CHARSET: [u8; 5] = [0x5C, 0x61, 0x6E, 0x73, 0x69]; const MAC_CHARSET: [u8; 4] = [0x5C, 0x6D, 0x61, 0x63]; const PC_CHARSET: [u8; 3] = [0x5C, 0x70, 0x63]; const PCA_CHARSET: [u8; 4] = [0x5C, 0x70, 0x63, 0x61]; #[derive(Copy, Clone, Debug)]
29pub enum CharacterSet {
30 Ansi,
32
33 MacAnsi,
35
36 Pc,
38
39 Pca,
41}
42
43#[derive(Clone, Debug)]
45pub struct Rtf<'a> {
46 pub character_set: Option<CharacterSet>,
48
49 pub title: Option<String>,
51
52 pub contents: &'a [u8],
54}
55
56impl<'a> Rtf<'a> {
57 #[instrument(name = "RTF parser", skip(contents))]
59 pub fn from(contents: &'a [u8]) -> Result<Self> {
60 let doc_front = &contents[..40.min(contents.len())];
61
62 if !doc_front.starts_with(&RTF_MAGIC) {
63 bail!("not an RTF");
64 }
65
66 let mut character_set = None;
67 if find_subsequence(doc_front, &ANSI_CHARSET).is_some() {
68 character_set = Some(CharacterSet::Ansi);
69 } else if find_subsequence(doc_front, &MAC_CHARSET).is_some() {
70 character_set = Some(CharacterSet::MacAnsi);
71 } else if find_subsequence(doc_front, &PCA_CHARSET).is_some() {
72 character_set = Some(CharacterSet::Pca);
73 } else if find_subsequence(doc_front, &PC_CHARSET).is_some() {
74 character_set = Some(CharacterSet::Pc);
75 }
76
77 let mut title = None;
78 if contents.len() < 100_000 {
79 title = if let Some(start_index) = find_subsequence(contents, &TITLE_HEADER) {
80 let start_index = start_index + TITLE_HEADER.len() + 1; let mut end_index = start_index;
82 while contents[end_index] != 0x7D
83 && end_index < start_index + 200
84 && end_index < contents.len()
85 {
86 end_index += 1;
87 }
88 if end_index < contents.len() && end_index > start_index + 1 {
89 String::from_utf8(contents[start_index..end_index].to_vec()).ok()
90 } else {
91 None }
93 } else {
94 None };
96 }
97
98 Ok(Self {
99 character_set,
100 title,
101 contents,
102 })
103 }
104}
105
106impl DocumentFile for Rtf<'_> {
108 fn pages(&self) -> u32 {
109 0
110 }
111
112 fn author(&self) -> Option<String> {
113 None
114 }
115
116 fn title(&self) -> Option<String> {
117 None
118 }
119
120 fn has_javascript(&self) -> bool {
121 false }
123
124 fn has_form(&self) -> bool {
125 false
126 }
127
128 fn creation_time(&self) -> Option<DateTime<Utc>> {
129 None
130 }
131
132 fn modification_time(&self) -> Option<DateTime<Utc>> {
133 None
134 }
135}
136
137impl SpecimenFile for Rtf<'_> {
138 const MAGIC: &'static [&'static [u8]] = &[&RTF_MAGIC];
139
140 fn type_name(&self) -> &'static str {
141 "RTF"
142 }
143}
144
145impl Display for Rtf<'_> {
146 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
147 write!(f, "RTF")?;
148 if let Some(title) = &self.title {
149 write!(f, ", Title: \"{title}\"")?;
150 }
151 if let Some(charset) = &self.character_set {
152 write!(f, ", Character Set: {charset:?}")?;
153 }
154 write!(f, ", Bytes: {}", self.contents.len())
155 }
156}
157
158#[cfg(test)]
159mod tests {
160 use super::*;
161
162 #[test]
163 fn rtf() {
164 const BYTES: &[u8] = include_bytes!("../../testdata/rtf/hello.rtf");
165
166 let rtf = Rtf::from(BYTES);
167 assert!(rtf.is_ok());
168
169 let rtf = rtf.unwrap();
170 println!("RTF: {rtf}");
171 assert_eq!(rtf.title.unwrap(), "RTF Title");
172 }
173
174 #[test]
175 fn emtpy() {
176 const BYTES: &[u8] = include_bytes!("../../testdata/rtf/empty.rtf");
177
178 let rtf = Rtf::from(BYTES);
179 assert!(rtf.is_ok());
180 assert!(rtf.unwrap().title.is_none());
181 }
182}