1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
#[macro_use]
extern crate lazy_static;
extern crate regex;
extern crate serde_json;

use std::str;
use regex::Regex;
use std::borrow::Borrow;
use serde_json::Value;

// include the json in the bin
const AFFIN:&'static [u8; 32811] = include_bytes!("./afinn.json");

lazy_static! {
    static ref AFFIN_VALUE: Value = {
        let json = str::from_utf8(AFFIN).unwrap();
        serde_json::from_str(json).unwrap()
    };
}

/// Struct for return the outcome of individual sentiments 
pub struct Sentiment {
    /// The sentiment score
    pub score: f32,
    /// The score compared with total tokens analysed
    pub comparative: f32,
    /// The matching set of words
    pub words: Vec<String>,
}

/// Struct for return the outcome of analysis
pub struct Analysis {
    /// The sentiment score
    pub score: f32,
    /// The score compared with total tokens analysed
    pub comparative: f32,
    /// Positivity score
    pub positive: Sentiment,
    /// Negativity score
    pub negative: Sentiment,
}

fn tokenize_with_no_punctuation(phrase: String) -> Vec<String> {
    let re = Regex::new(r"[^a-zA-Z0 -]+").unwrap();
    let re2 = Regex::new(r" {2,}").unwrap();

    let no_punctuation = re.replace_all(phrase.as_str(), " ");
    let no_punctuation = re2.replace_all(no_punctuation.borrow(), " ");

    no_punctuation.to_lowercase().split(" ").map(|s| s.to_string()).collect()
}

/// Calculates the negativity of a sentence
pub fn negativity(phrase: String) -> Sentiment {
    let tokens = tokenize_with_no_punctuation(phrase);
    let tokens_len = tokens.len() as f32;
    let mut score = 0f32;
    let mut words = Vec::new();

    for t in tokens {
        let word = t.clone();
        if let Value::Number(ref val) = AFFIN_VALUE[t] {
            let diff = val.as_f64().unwrap() as f32;
            if diff < 0f32 {
                score -= diff;
                words.push(word); 
            }
        }
    }

    Sentiment {
        score: score,
        comparative: score / tokens_len,
        words: words,
    }
}

/// Calculates the positivity of a sentence
pub fn positivity(phrase: String) -> Sentiment {
    let tokens = tokenize_with_no_punctuation(phrase);
    let tokens_len = tokens.len() as f32;
    let mut score = 0f32;
    let mut words = Vec::new();

    for t in tokens {
        let word = t.clone();
        if let Value::Number(ref val) = AFFIN_VALUE[t] {
            let diff = val.as_f64().unwrap() as f32;
            if diff > 0f32 {
                score += diff;
                words.push(word); 
            }
        }
    }

    Sentiment {
        score: score,
        comparative: score / tokens_len,
        words: words,
    }
}

/// Calculates the overall sentiment
pub fn analyze(phrase: String) -> Analysis {
    let neg = negativity(phrase.clone());
    let pos = positivity(phrase.clone());

    Analysis {
        score: pos.score - neg.score,
        comparative: pos.comparative - neg.comparative,
        positive: pos,
        negative: neg,
    }
}

#[test]
fn decode_affin() {
    let json = str::from_utf8(AFFIN).unwrap();
    assert!(json.len() != 0usize);
}

#[test]
fn tokenize() {
    let tokens = tokenize_with_no_punctuation("staRt,./     {middle//////end".to_string());
    assert_eq!(tokens, vec!["start".to_string(), "middle".to_string(), "end".to_string()]);
}

#[test]
fn test_negativity() {
    let sentiment = negativity("I do not like jam tarts".to_string());
    assert_eq!(sentiment.score, 0f32);
    assert_eq!(sentiment.words, Vec::<String>::new());
}

#[test]
fn test_positivity() {
    let sentiment = positivity("I do not like jam tarts".to_string());
    assert_eq!(sentiment.score, 2f32);
    assert_eq!(sentiment.words, vec!["like"]);
}

#[test]
fn test_analyze() {
    let analysis = analyze("I do not like jam tarts".to_string());
    assert_eq!(analysis.score, 2f32);
}