use crate::utils::find_subsequence;
use crate::SpecimenFile;
use std::fmt::{Display, Formatter};
use crate::doc::DocumentFile;
use anyhow::{bail, Result};
use chrono::{DateTime, Utc};
const RTF_MAGIC: [u8; 4] = [0x7B, 0x5C, 0x72, 0x74];
const TITLE_HEADER: [u8; 13] = [
0x7B, 0x5C, 0x69, 0x6E, 0x66, 0x6F, 0x7B, 0x5C, 0x74, 0x69, 0x74, 0x6C, 0x65,
]; const ANSI_CHARSET: [u8; 5] = [0x5C, 0x61, 0x6E, 0x73, 0x69]; const MAC_CHARSET: [u8; 4] = [0x5C, 0x6D, 0x61, 0x63]; const PC_CHARSET: [u8; 3] = [0x5C, 0x70, 0x63]; const PCA_CHARSET: [u8; 4] = [0x5C, 0x70, 0x63, 0x61]; #[derive(Copy, Clone, Debug)]
pub enum CharacterSet {
Ansi,
MacAnsi,
Pc,
Pca,
}
#[derive(Clone, Debug)]
pub struct Rtf<'a> {
pub character_set: Option<CharacterSet>,
pub title: Option<String>,
pub contents: &'a [u8],
}
impl<'a> Rtf<'a> {
pub fn from(contents: &'a [u8]) -> Result<Self> {
let doc_front = &contents[..40.min(contents.len())];
if !doc_front.starts_with(&RTF_MAGIC) {
bail!("not an RTF");
}
let mut character_set = None;
if find_subsequence(doc_front, &ANSI_CHARSET).is_some() {
character_set = Some(CharacterSet::Ansi);
} else if find_subsequence(doc_front, &MAC_CHARSET).is_some() {
character_set = Some(CharacterSet::MacAnsi);
} else if find_subsequence(doc_front, &PCA_CHARSET).is_some() {
character_set = Some(CharacterSet::Pca);
} else if find_subsequence(doc_front, &PC_CHARSET).is_some() {
character_set = Some(CharacterSet::Pc);
}
let mut title = None;
if contents.len() < 100000 {
title = if let Some(start_index) = find_subsequence(contents, &TITLE_HEADER) {
let start_index = start_index + TITLE_HEADER.len() + 1; let mut end_index = start_index;
while contents[end_index] != 0x7D
&& end_index < start_index + 200
&& end_index < contents.len()
{
end_index += 1;
}
if end_index < contents.len() && end_index > start_index + 1 {
if let Ok(title) = String::from_utf8(contents[start_index..end_index].to_vec())
{
Some(title)
} else {
None }
} else {
None }
} else {
None };
}
Ok(Self {
character_set,
title,
contents,
})
}
}
impl<'a> DocumentFile for Rtf<'a> {
fn pages(&self) -> u32 {
0
}
fn author(&self) -> Option<String> {
None
}
fn title(&self) -> Option<String> {
None
}
fn has_javascript(&self) -> bool {
false }
fn has_form(&self) -> bool {
false
}
fn creation_time(&self) -> Option<DateTime<Utc>> {
None
}
fn modification_time(&self) -> Option<DateTime<Utc>> {
None
}
}
impl<'a> SpecimenFile for Rtf<'a> {
const MAGIC: &'static [&'static [u8]] = &[&RTF_MAGIC];
fn type_name(&self) -> &'static str {
"RTF"
}
}
impl<'a> Display for Rtf<'a> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "RTF")?;
if let Some(title) = &self.title {
write!(f, ", Title: \"{title}\"")?;
}
if let Some(charset) = &self.character_set {
write!(f, ", Character Set: {charset:?}")?;
}
write!(f, ", Bytes: {}", self.contents.len())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn rtf() {
const BYTES: &[u8] = include_bytes!("../../testdata/rtf/hello.rtf");
let rtf = Rtf::from(BYTES);
assert!(rtf.is_ok());
let rtf = rtf.unwrap();
println!("RTF: {rtf}");
assert_eq!(rtf.title.unwrap(), "RTF Title");
}
#[test]
fn emtpy() {
const BYTES: &[u8] = include_bytes!("../../testdata/rtf/empty.rtf");
let rtf = Rtf::from(BYTES);
assert!(rtf.is_ok());
assert!(rtf.unwrap().title.is_none());
}
}