use std::collections::HashMap;
use errors::ArmorlibError;
use scan_module::ScanModule;
use scan_object::ScanObject;
use finding::{Finding, Severity};
pub struct UnicodeWatermarkScanModule;
fn watermark_chars() -> HashMap<char, &'static str> {
hashmap! {
'\u{200b}' => "Unicode zero-width space: most likely suspicious",
'\u{feff}' => "Unicode zero-width no-break space: most likely suspicious",
'\u{200c}' => "Unicode zero-width non-joiner: could be OK, possibly suspicious",
'\u{200d}' => "Unicode zero-width joiner: could be OK, possibly suspicious",
}
}
fn surrounding_text(i: usize, text: &str) -> String {
let mut surround = String::new();
let mut is_in_range = false;
for (j, character) in text.char_indices() {
if !is_in_range && i <= 20 + j && i >= j {
is_in_range = true;
}
if is_in_range && j == 20 + i {
is_in_range = false;
}
if is_in_range {
if i == j {
surround.push_str("_[HERE]_");
} else {
surround.push(character);
}
}
}
surround
}
impl ScanModule for UnicodeWatermarkScanModule {
fn scan(&self, scan_object: &ScanObject) -> Result<Vec<Finding>, ArmorlibError> {
let encoding: &String = scan_object.get_metadata("text/encoding")?;
if encoding == &String::from("binary") {
return Ok(vec!()); }
let mut findings = Vec::new();
let text = scan_object.get_metadata("text/text")?;
let watermarks = watermark_chars();
for (i, character) in text.char_indices() {
match watermarks.get(&character) {
Some(warn_text) => {
findings.push(Finding {
title: String::from(*warn_text),
description: format!(
"found suspicious character at index {}: \"{}\"",
i,
surrounding_text(i, text.as_str())
),
id: String::from("UNICODE_WATERMARK"),
severity: Severity::Warn(String::from(
"possible attempt to watermark data",
)),
});
}
_ => {}
}
}
Ok(findings)
}
fn info(&self) -> (&'static str, &'static str) {
(
"unicode_watermark",
"searches for attempts to watermark text using unusual Unicode",
)
}
fn required_preprocessors(&self) -> Vec<&'static str> {
vec!["text"]
}
}
#[cfg(test)]
mod tests {
#[allow(unused_imports)]
use super::*;
use coordinator::process;
use binary_object::BinaryObject;
#[test]
fn test_zero_width_space_char() {
let sus_string = "The nuclear launch codes are 0000, 0001, and 1234.";
let mut scan_result = process(
vec![Box::new(UnicodeWatermarkScanModule)],
Vec::new(),
BinaryObject::from(sus_string.as_bytes().to_vec()),
None,
).unwrap();
let scan_report = scan_result.reports.pop().unwrap();
assert_eq!(scan_report.module_info.0.as_str(), "unicode_watermark");
assert_eq!(
scan_report.module_info.1.as_str(),
"searches for attempts to watermark text using unusual Unicode"
);
let findings = scan_report.findings.unwrap();
let finding1 = findings.get(0).unwrap();
let finding2 = findings.get(1).unwrap();
assert_eq!(
finding1.title,
"Unicode zero-width no-break space: most likely suspicious"
);
assert_eq!(
finding2.title,
"Unicode zero-width space: most likely suspicious"
);
assert_eq!(finding1.id, "UNICODE_WATERMARK");
assert_eq!(finding1.severity, finding2.severity);
assert_eq!(
finding1.severity,
Severity::Warn(String::from("possible attempt to watermark data"))
);
assert_eq!(
finding1.description,
"found suspicious character at index 3: \"The_[HERE]_ nuclear\u{200b} launc\""
);
assert_eq!(finding2.description,
"found suspicious character at index 14: \"The\u{feff} nuclear_[HERE]_ launch codes are\"");
}
#[test]
fn test_zero_width_joiner_non_joiner() {
let sus_string = "The nuclear launch codes are 0000, 0001, and 1234";
let mut scan_result = process(
vec![Box::new(UnicodeWatermarkScanModule)],
Vec::new(),
BinaryObject::from(sus_string.as_bytes().to_vec()),
None,
).unwrap();
let scan_report = scan_result.reports.pop().unwrap();
assert_eq!(scan_report.module_info.0.as_str(), "unicode_watermark");
assert_eq!(
scan_report.module_info.1.as_str(),
"searches for attempts to watermark text using unusual Unicode"
);
let findings = scan_report.findings.unwrap();
let finding1 = findings.get(0).unwrap();
let finding2 = findings.get(1).unwrap();
assert_eq!(
finding1.title,
"Unicode zero-width non-joiner: could be OK, possibly suspicious"
);
assert_eq!(
finding2.title,
"Unicode zero-width joiner: could be OK, possibly suspicious"
);
assert_eq!(finding1.id, "UNICODE_WATERMARK");
assert_eq!(finding1.severity, finding2.severity);
assert_eq!(
finding1.severity,
Severity::Warn(String::from("possible attempt to watermark data"))
);
assert_eq!(
finding1.description,
"found suspicious character at index 3: \"The_[HERE]_ nuclear launch c\""
);
assert_eq!(
finding2.description,
"found suspicious character at index 42: \"codes are 0000, 0001_[HERE]_, and 1234\""
);
}
}