#[macro_use]
extern crate lazy_static;
extern crate regex;
extern crate serde_json;
use std::str;
use regex::Regex;
use std::borrow::Borrow;
use serde_json::Value;
const AFFIN:&'static [u8; 32811] = include_bytes!("./afinn.json");
lazy_static! {
static ref AFFIN_VALUE: Value = {
let json = str::from_utf8(AFFIN).unwrap();
serde_json::from_str(json).unwrap()
};
}
pub struct Sentiment {
pub score: f32,
pub comparative: f32,
pub words: Vec<String>,
}
pub struct Analysis {
pub score: f32,
pub comparative: f32,
pub positive: Sentiment,
pub negative: Sentiment,
}
fn tokenize_with_no_punctuation(phrase: String) -> Vec<String> {
let re = Regex::new(r"[^a-zA-Z0 -]+").unwrap();
let re2 = Regex::new(r" {2,}").unwrap();
let no_punctuation = re.replace_all(phrase.as_str(), " ");
let no_punctuation = re2.replace_all(no_punctuation.borrow(), " ");
no_punctuation.to_lowercase().split(" ").map(|s| s.to_string()).collect()
}
pub fn negativity(phrase: String) -> Sentiment {
let tokens = tokenize_with_no_punctuation(phrase);
let tokens_len = tokens.len() as f32;
let mut score = 0f32;
let mut words = Vec::new();
for t in tokens {
let word = t.clone();
if let Value::Number(ref val) = AFFIN_VALUE[t] {
let diff = val.as_f64().unwrap() as f32;
if diff < 0f32 {
score -= diff;
words.push(word);
}
}
}
Sentiment {
score: score,
comparative: score / tokens_len,
words: words,
}
}
pub fn positivity(phrase: String) -> Sentiment {
let tokens = tokenize_with_no_punctuation(phrase);
let tokens_len = tokens.len() as f32;
let mut score = 0f32;
let mut words = Vec::new();
for t in tokens {
let word = t.clone();
if let Value::Number(ref val) = AFFIN_VALUE[t] {
let diff = val.as_f64().unwrap() as f32;
if diff > 0f32 {
score += diff;
words.push(word);
}
}
}
Sentiment {
score: score,
comparative: score / tokens_len,
words: words,
}
}
pub fn analyze(phrase: String) -> Analysis {
let neg = negativity(phrase.clone());
let pos = positivity(phrase.clone());
Analysis {
score: pos.score - neg.score,
comparative: pos.comparative - neg.comparative,
positive: pos,
negative: neg,
}
}
#[test]
fn decode_affin() {
let json = str::from_utf8(AFFIN).unwrap();
assert!(json.len() != 0usize);
}
#[test]
fn tokenize() {
let tokens = tokenize_with_no_punctuation("staRt,./ {middle//////end".to_string());
assert_eq!(tokens, vec!["start".to_string(), "middle".to_string(), "end".to_string()]);
}
#[test]
fn test_negativity() {
let sentiment = negativity("I do not like jam tarts".to_string());
assert_eq!(sentiment.score, 0f32);
assert_eq!(sentiment.words, Vec::<String>::new());
}
#[test]
fn test_positivity() {
let sentiment = positivity("I do not like jam tarts".to_string());
assert_eq!(sentiment.score, 2f32);
assert_eq!(sentiment.words, vec!["like"]);
}
#[test]
fn test_analyze() {
let analysis = analyze("I do not like jam tarts".to_string());
assert_eq!(analysis.score, 2f32);
}