1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
#[macro_use]
extern crate lazy_static;
extern crate regex;
extern crate serde_json;
use std::str;
use regex::Regex;
use std::borrow::Borrow;
use serde_json::Value;
const AFFIN:&'static [u8; 32811] = include_bytes!("./afinn.json");
lazy_static! {
static ref AFFIN_VALUE: Value = {
let json = str::from_utf8(AFFIN).unwrap();
serde_json::from_str(json).unwrap()
};
}
pub struct Sentiment {
pub score: f32,
pub comparative: f32,
pub words: Vec<String>,
}
pub struct Analysis {
pub score: f32,
pub comparative: f32,
pub positive: Sentiment,
pub negative: Sentiment,
}
fn tokenize_with_no_punctuation(phrase: String) -> Vec<String> {
let re = Regex::new(r"[^a-zA-Z0 -]+").unwrap();
let re2 = Regex::new(r" {2,}").unwrap();
let no_punctuation = re.replace_all(phrase.as_str(), " ");
let no_punctuation = re2.replace_all(no_punctuation.borrow(), " ");
no_punctuation.to_lowercase().split(" ").map(|s| s.to_string()).collect()
}
pub fn negativity(phrase: String) -> Sentiment {
let tokens = tokenize_with_no_punctuation(phrase);
let tokens_len = tokens.len() as f32;
let mut score = 0f32;
let mut words = Vec::new();
for t in tokens {
let word = t.clone();
if let Value::Number(ref val) = AFFIN_VALUE[t] {
let diff = val.as_f64().unwrap() as f32;
if diff < 0f32 {
score -= diff;
words.push(word);
}
}
}
Sentiment {
score: score,
comparative: score / tokens_len,
words: words,
}
}
pub fn positivity(phrase: String) -> Sentiment {
let tokens = tokenize_with_no_punctuation(phrase);
let tokens_len = tokens.len() as f32;
let mut score = 0f32;
let mut words = Vec::new();
for t in tokens {
let word = t.clone();
if let Value::Number(ref val) = AFFIN_VALUE[t] {
let diff = val.as_f64().unwrap() as f32;
if diff > 0f32 {
score += diff;
words.push(word);
}
}
}
Sentiment {
score: score,
comparative: score / tokens_len,
words: words,
}
}
pub fn analyze(phrase: String) -> Analysis {
let neg = negativity(phrase.clone());
let pos = positivity(phrase.clone());
Analysis {
score: pos.score - neg.score,
comparative: pos.comparative - neg.comparative,
positive: pos,
negative: neg,
}
}
#[test]
fn decode_affin() {
let json = str::from_utf8(AFFIN).unwrap();
assert!(json.len() != 0usize);
}
#[test]
fn tokenize() {
let tokens = tokenize_with_no_punctuation("staRt,./ {middle//////end".to_string());
assert_eq!(tokens, vec!["start".to_string(), "middle".to_string(), "end".to_string()]);
}
#[test]
fn test_negativity() {
let sentiment = negativity("I do not like jam tarts".to_string());
assert_eq!(sentiment.score, 0f32);
assert_eq!(sentiment.words, Vec::<String>::new());
}
#[test]
fn test_positivity() {
let sentiment = positivity("I do not like jam tarts".to_string());
assert_eq!(sentiment.score, 2f32);
assert_eq!(sentiment.words, vec!["like"]);
}
#[test]
fn test_analyze() {
let analysis = analyze("I do not like jam tarts".to_string());
assert_eq!(analysis.score, 2f32);
}