use crate::error::{Result, TextError};
use crate::tokenize::{Tokenizer, WordTokenizer};
use std::collections::HashMap;
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum Sentiment {
Positive,
Negative,
Neutral,
}
impl std::fmt::Display for Sentiment {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Sentiment::Positive => write!(f, "Positive"),
Sentiment::Negative => write!(f, "Negative"),
Sentiment::Neutral => write!(f, "Neutral"),
}
}
}
impl Sentiment {
pub fn to_score(&self) -> f64 {
match self {
Sentiment::Positive => 1.0,
Sentiment::Neutral => 0.0,
Sentiment::Negative => -1.0,
}
}
pub fn from_score(score: f64) -> Self {
if score > 0.05 {
Sentiment::Positive
} else if score < -0.05 {
Sentiment::Negative
} else {
Sentiment::Neutral
}
}
}
#[derive(Debug, Clone)]
pub struct SentimentResult {
pub sentiment: Sentiment,
pub score: f64,
pub confidence: f64,
pub word_counts: SentimentWordCounts,
}
#[derive(Debug, Clone, Default)]
pub struct SentimentWordCounts {
pub positive_words: usize,
pub negative_words: usize,
pub neutral_words: usize,
pub total_words: usize,
}
#[derive(Debug, Clone)]
pub struct SentimentLexicon {
lexicon: HashMap<String, f64>,
default_score: f64,
}
impl SentimentLexicon {
pub fn new() -> Self {
Self {
lexicon: HashMap::new(),
default_score: 0.0,
}
}
pub fn with_basiclexicon() -> Self {
let mut lexicon = HashMap::new();
let positive_words = [
("good", 1.0),
("great", 2.0),
("excellent", 3.0),
("amazing", 3.0),
("wonderful", 2.5),
("fantastic", 2.5),
("love", 2.0),
("like", 1.0),
("happy", 2.0),
("joy", 2.0),
("pleased", 1.5),
("satisfied", 1.0),
("positive", 1.0),
("perfect", 3.0),
("best", 2.5),
("awesome", 2.5),
("beautiful", 2.0),
("brilliant", 2.5),
("superb", 2.5),
("nice", 1.0),
("outstanding", 3.0),
("exceptional", 3.0),
("remarkable", 2.0),
("delightful", 2.5),
("impressive", 2.0),
("enjoy", 1.5),
("recommend", 1.5),
("better", 1.0),
("superior", 2.0),
("exciting", 2.0),
];
let negative_words = [
("bad", -1.0),
("terrible", -2.5),
("awful", -2.5),
("horrible", -3.0),
("hate", -2.5),
("dislike", -1.5),
("sad", -2.0),
("unhappy", -2.0),
("disappointed", -2.0),
("negative", -1.0),
("worst", -3.0),
("poor", -1.5),
("disgusting", -3.0),
("ugly", -2.0),
("nasty", -2.5),
("stupid", -2.0),
("pathetic", -2.5),
("failure", -2.0),
("fail", -2.0),
("sucks", -2.0),
("boring", -1.5),
("mediocre", -1.0),
("inferior", -2.0),
("lousy", -2.0),
("dreadful", -2.5),
("annoying", -1.5),
("frustrating", -2.0),
("disappointing", -2.0),
("terrible", -2.5),
("useless", -2.0),
];
for (word, score) in &positive_words {
lexicon.insert(word.to_string(), *score);
}
for (word, score) in &negative_words {
lexicon.insert(word.to_string(), *score);
}
Self {
lexicon,
default_score: 0.0,
}
}
pub fn add_word(&mut self, word: String, score: f64) {
self.lexicon.insert(word.to_lowercase(), score);
}
pub fn get_score(&self, word: &str) -> f64 {
self.lexicon
.get(&word.to_lowercase())
.copied()
.unwrap_or(self.default_score)
}
pub fn contains(&self, word: &str) -> bool {
self.lexicon.contains_key(&word.to_lowercase())
}
pub fn len(&self) -> usize {
self.lexicon.len()
}
pub fn is_empty(&self) -> bool {
self.lexicon.is_empty()
}
pub fn entries(&self) -> &HashMap<String, f64> {
&self.lexicon
}
}
impl Default for SentimentLexicon {
fn default() -> Self {
Self::new()
}
}
pub struct LexiconSentimentAnalyzer {
lexicon: SentimentLexicon,
tokenizer: Box<dyn Tokenizer + Send + Sync>,
negation_words: Vec<String>,
negation_window: usize,
}
impl LexiconSentimentAnalyzer {
pub fn new(lexicon: SentimentLexicon) -> Self {
let negation_words = vec![
"not".to_string(),
"no".to_string(),
"never".to_string(),
"neither".to_string(),
"nobody".to_string(),
"nothing".to_string(),
"nowhere".to_string(),
"n't".to_string(),
"cannot".to_string(),
"without".to_string(),
];
Self {
lexicon,
tokenizer: Box::new(WordTokenizer::default()),
negation_words,
negation_window: 3,
}
}
pub fn with_basiclexicon() -> Self {
Self::new(SentimentLexicon::with_basiclexicon())
}
pub fn with_tokenizer(mut self, tokenizer: Box<dyn Tokenizer + Send + Sync>) -> Self {
self.tokenizer = tokenizer;
self
}
pub fn analyze(&self, text: &str) -> Result<SentimentResult> {
let tokens = self.tokenizer.tokenize(text)?;
if tokens.is_empty() {
return Ok(SentimentResult {
sentiment: Sentiment::Neutral,
score: 0.0,
confidence: 0.0,
word_counts: SentimentWordCounts {
positive_words: 0,
negative_words: 0,
neutral_words: 0,
total_words: 0,
},
});
}
let mut total_score = 0.0;
let mut positive_count = 0;
let mut negative_count = 0;
let mut neutral_count = 0;
for (i, token) in tokens.iter().enumerate() {
let token_lower = token.to_lowercase();
let mut score = self.lexicon.get_score(&token_lower);
if score != 0.0 {
for j in 1..=self.negation_window.min(i) {
let prev_token = &tokens[i - j].to_lowercase();
if self.negation_words.contains(prev_token) {
score *= -1.0;
break;
}
}
}
total_score += score;
if score > 0.0 {
positive_count += 1;
} else if score < 0.0 {
negative_count += 1;
} else {
neutral_count += 1;
}
}
let total_words = tokens.len();
let sentiment = Sentiment::from_score(total_score);
let sentiment_words = positive_count + negative_count;
let confidence = if total_words > 0 {
(sentiment_words as f64 / total_words as f64).min(1.0)
} else {
0.0
};
Ok(SentimentResult {
sentiment,
score: total_score,
confidence,
word_counts: SentimentWordCounts {
positive_words: positive_count,
negative_words: negative_count,
neutral_words: neutral_count,
total_words,
},
})
}
pub fn analyze_batch(&self, texts: &[&str]) -> Result<Vec<SentimentResult>> {
texts.iter().map(|&text| self.analyze(text)).collect()
}
}
#[derive(Debug, Clone)]
pub struct SentimentRules {
intensifiers: HashMap<String, f64>,
diminishers: HashMap<String, f64>,
}
impl Default for SentimentRules {
fn default() -> Self {
let mut intensifiers = HashMap::new();
intensifiers.insert("very".to_string(), 1.5);
intensifiers.insert("extremely".to_string(), 2.0);
intensifiers.insert("incredibly".to_string(), 2.0);
intensifiers.insert("really".to_string(), 1.3);
intensifiers.insert("so".to_string(), 1.3);
intensifiers.insert("absolutely".to_string(), 2.0);
intensifiers.insert("truly".to_string(), 1.5);
intensifiers.insert("totally".to_string(), 1.5);
intensifiers.insert("utterly".to_string(), 1.8);
intensifiers.insert("remarkably".to_string(), 1.5);
let mut diminishers = HashMap::new();
diminishers.insert("somewhat".to_string(), 0.5);
diminishers.insert("slightly".to_string(), 0.5);
diminishers.insert("barely".to_string(), 0.3);
diminishers.insert("hardly".to_string(), 0.3);
diminishers.insert("a little".to_string(), 0.5);
diminishers.insert("kind of".to_string(), 0.5);
diminishers.insert("sort of".to_string(), 0.5);
diminishers.insert("marginally".to_string(), 0.4);
Self {
intensifiers,
diminishers,
}
}
}
impl SentimentRules {
pub fn apply(&self, tokens: &[String], basescores: &[f64]) -> Vec<f64> {
let mut modified_scores = basescores.to_vec();
for (i, score) in modified_scores.iter_mut().enumerate() {
if *score == 0.0 {
continue;
}
for j in 1..=2.min(i) {
let prev_token = &tokens[i - j].to_lowercase();
if let Some(&multiplier) = self.intensifiers.get(prev_token) {
*score *= multiplier;
break;
} else if let Some(&multiplier) = self.diminishers.get(prev_token) {
*score *= multiplier;
break;
}
}
}
modified_scores
}
}
pub struct RuleBasedSentimentAnalyzer {
base_analyzer: LexiconSentimentAnalyzer,
rules: SentimentRules,
}
impl RuleBasedSentimentAnalyzer {
pub fn new(lexicon: SentimentLexicon) -> Self {
Self {
base_analyzer: LexiconSentimentAnalyzer::new(lexicon),
rules: SentimentRules::default(),
}
}
pub fn with_basiclexicon() -> Self {
Self::new(SentimentLexicon::with_basiclexicon())
}
pub fn analyze(&self, text: &str) -> Result<SentimentResult> {
let tokens = self.base_analyzer.tokenizer.tokenize(text)?;
if tokens.is_empty() {
return self.base_analyzer.analyze(text);
}
let basescores: Vec<f64> = tokens
.iter()
.map(|token| self.base_analyzer.lexicon.get_score(token))
.collect();
let modified_scores = self.rules.apply(&tokens, &basescores);
let total_score: f64 = modified_scores.iter().sum();
let sentiment = Sentiment::from_score(total_score);
let mut positive_count = 0;
let mut negative_count = 0;
let mut neutral_count = 0;
for &score in &modified_scores {
if score > 0.0 {
positive_count += 1;
} else if score < 0.0 {
negative_count += 1;
} else {
neutral_count += 1;
}
}
let total_words = tokens.len();
let sentiment_words = positive_count + negative_count;
let confidence = if total_words > 0 {
(sentiment_words as f64 / total_words as f64).min(1.0)
} else {
0.0
};
Ok(SentimentResult {
sentiment,
score: total_score,
confidence,
word_counts: SentimentWordCounts {
positive_words: positive_count,
negative_words: negative_count,
neutral_words: neutral_count,
total_words,
},
})
}
}
#[derive(Debug, Clone)]
pub struct VaderResult {
pub positive: f64,
pub negative: f64,
pub neutral: f64,
pub compound: f64,
pub sentiment: Sentiment,
}
pub struct VaderSentimentAnalyzer {
lexicon: SentimentLexicon,
tokenizer: Box<dyn Tokenizer + Send + Sync>,
negation_words: Vec<String>,
intensifiers: HashMap<String, f64>,
diminishers: HashMap<String, f64>,
but_weight: f64,
caps_multiplier: f64,
exclamation_boost: f64,
question_reduction: f64,
}
impl VaderSentimentAnalyzer {
pub fn new() -> Self {
let mut intensifiers = HashMap::new();
intensifiers.insert("very".to_string(), 0.293);
intensifiers.insert("extremely".to_string(), 0.293);
intensifiers.insert("absolutely".to_string(), 0.293);
intensifiers.insert("incredibly".to_string(), 0.293);
intensifiers.insert("really".to_string(), 0.18);
intensifiers.insert("so".to_string(), 0.18);
intensifiers.insert("truly".to_string(), 0.18);
intensifiers.insert("totally".to_string(), 0.18);
intensifiers.insert("quite".to_string(), 0.1);
let mut diminishers = HashMap::new();
diminishers.insert("somewhat".to_string(), -0.1);
diminishers.insert("barely".to_string(), -0.2);
diminishers.insert("hardly".to_string(), -0.2);
diminishers.insert("slightly".to_string(), -0.1);
diminishers.insert("kind of".to_string(), -0.1);
diminishers.insert("sort of".to_string(), -0.1);
let negation_words = vec![
"not".to_string(),
"no".to_string(),
"never".to_string(),
"neither".to_string(),
"nobody".to_string(),
"nothing".to_string(),
"nowhere".to_string(),
"cannot".to_string(),
"without".to_string(),
"don't".to_string(),
"doesn't".to_string(),
"didn't".to_string(),
"isn't".to_string(),
"wasn't".to_string(),
"won't".to_string(),
"wouldn't".to_string(),
"shouldn't".to_string(),
"couldn't".to_string(),
"aren't".to_string(),
"weren't".to_string(),
];
Self {
lexicon: SentimentLexicon::with_basiclexicon(),
tokenizer: Box::new(WordTokenizer::default()),
negation_words,
intensifiers,
diminishers,
but_weight: 0.5,
caps_multiplier: 0.733,
exclamation_boost: 0.292,
question_reduction: 0.18,
}
}
pub fn with_lexicon(mut self, lexicon: SentimentLexicon) -> Self {
self.lexicon = lexicon;
self
}
pub fn analyze(&self, text: &str) -> Result<VaderResult> {
let tokens = self.tokenizer.tokenize(text)?;
if tokens.is_empty() {
return Ok(VaderResult {
positive: 0.0,
negative: 0.0,
neutral: 1.0,
compound: 0.0,
sentiment: Sentiment::Neutral,
});
}
let mut sentiments: Vec<f64> = Vec::with_capacity(tokens.len());
for (i, token) in tokens.iter().enumerate() {
let lower = token.to_lowercase();
let mut score = self.lexicon.get_score(&lower);
if score == 0.0 {
sentiments.push(0.0);
continue;
}
if token.len() > 1 && token.chars().all(|c| c.is_uppercase()) {
if score > 0.0 {
score += self.caps_multiplier;
} else {
score -= self.caps_multiplier;
}
}
for j in 1..=3.min(i) {
let prev = tokens[i - j].to_lowercase();
if let Some(&boost) = self.intensifiers.get(&prev) {
if score > 0.0 {
score += boost;
} else {
score -= boost;
}
break;
} else if let Some(&reduce) = self.diminishers.get(&prev) {
if score > 0.0 {
score += reduce; } else {
score -= reduce;
}
break;
}
}
let mut negated = false;
for j in 1..=3.min(i) {
let prev = tokens[i - j].to_lowercase();
if self.negation_words.contains(&prev) {
negated = true;
break;
}
}
if negated {
score *= -0.74; }
sentiments.push(score);
}
let mut but_idx = None;
for (i, token) in tokens.iter().enumerate() {
if token.to_lowercase() == "but" || token.to_lowercase() == "however" {
but_idx = Some(i);
}
}
if let Some(idx) = but_idx {
for (i, score) in sentiments.iter_mut().enumerate() {
if i < idx {
*score *= 1.0 - self.but_weight;
} else if i > idx {
*score *= 1.0 + self.but_weight;
}
}
}
let mut sum_scores: f64 = sentiments.iter().sum();
let excl_count = text.chars().filter(|&c| c == '!').count().min(4);
if excl_count > 0 {
sum_scores += excl_count as f64 * self.exclamation_boost * sum_scores.signum();
}
if text.trim_end().ends_with('?') {
sum_scores *= 1.0 - self.question_reduction;
}
let compound = self.normalize(sum_scores);
let mut pos_sum = 0.0;
let mut neg_sum = 0.0;
let mut neu_count = 0.0;
for &s in &sentiments {
if s > 0.0 {
pos_sum += s;
} else if s < 0.0 {
neg_sum += s;
} else {
neu_count += 1.0;
}
}
let total = pos_sum + neg_sum.abs() + neu_count;
let (positive, negative, neutral) = if total > 0.0 {
(
(pos_sum / total).abs(),
(neg_sum / total).abs(),
neu_count / total,
)
} else {
(0.0, 0.0, 1.0)
};
let sentiment = if compound >= 0.05 {
Sentiment::Positive
} else if compound <= -0.05 {
Sentiment::Negative
} else {
Sentiment::Neutral
};
Ok(VaderResult {
positive,
negative,
neutral,
compound,
sentiment,
})
}
fn normalize(&self, score: f64) -> f64 {
let alpha = 15.0; score / (score * score + alpha).sqrt()
}
pub fn analyze_batch(&self, texts: &[&str]) -> Result<Vec<VaderResult>> {
texts.iter().map(|&text| self.analyze(text)).collect()
}
}
impl Default for VaderSentimentAnalyzer {
fn default() -> Self {
Self::new()
}
}
pub struct NaiveBayesSentiment {
word_counts: HashMap<String, HashMap<String, f64>>,
class_word_totals: HashMap<String, f64>,
class_doc_counts: HashMap<String, usize>,
total_docs: usize,
vocabulary: HashMap<String, usize>,
alpha: f64,
tokenizer: Box<dyn Tokenizer + Send + Sync>,
}
impl std::fmt::Debug for NaiveBayesSentiment {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("NaiveBayesSentiment")
.field("total_docs", &self.total_docs)
.field("vocabulary_size", &self.vocabulary.len())
.field("alpha", &self.alpha)
.field("classes", &self.class_doc_counts.keys().collect::<Vec<_>>())
.finish()
}
}
impl NaiveBayesSentiment {
pub fn new() -> Self {
Self {
word_counts: HashMap::new(),
class_word_totals: HashMap::new(),
class_doc_counts: HashMap::new(),
total_docs: 0,
vocabulary: HashMap::new(),
alpha: 1.0,
tokenizer: Box::new(WordTokenizer::default()),
}
}
pub fn with_alpha(mut self, alpha: f64) -> Self {
self.alpha = alpha;
self
}
pub fn train(&mut self, texts: &[&str], labels: &[&str]) -> Result<()> {
if texts.len() != labels.len() {
return Err(TextError::InvalidInput(
"texts and labels must have the same length".into(),
));
}
if texts.is_empty() {
return Err(TextError::InvalidInput("No training data provided".into()));
}
for (text, &label) in texts.iter().zip(labels.iter()) {
let tokens = self.tokenizer.tokenize(text)?;
*self.class_doc_counts.entry(label.to_string()).or_insert(0) += 1;
self.total_docs += 1;
let class_words = self.word_counts.entry(label.to_string()).or_default();
for token in &tokens {
let lower = token.to_lowercase();
*class_words.entry(lower.clone()).or_insert(0.0) += 1.0;
*self
.class_word_totals
.entry(label.to_string())
.or_insert(0.0) += 1.0;
let vocab_len = self.vocabulary.len();
self.vocabulary.entry(lower).or_insert(vocab_len);
}
}
Ok(())
}
pub fn predict(&self, text: &str) -> Result<String> {
let (label, _) = self.predict_with_score(text)?;
Ok(label)
}
pub fn predict_with_score(&self, text: &str) -> Result<(String, f64)> {
if self.total_docs == 0 {
return Err(TextError::ModelNotFitted(
"Classifier not trained. Call train() first".into(),
));
}
let tokens = self.tokenizer.tokenize(text)?;
let vocab_size = self.vocabulary.len() as f64;
let mut best_label = String::new();
let mut best_score = f64::NEG_INFINITY;
for (label, &doc_count) in &self.class_doc_counts {
let log_prior = (doc_count as f64 / self.total_docs as f64).ln();
let class_words = self.word_counts.get(label);
let class_total = self.class_word_totals.get(label).copied().unwrap_or(0.0);
let mut log_likelihood = 0.0;
for token in &tokens {
let lower = token.to_lowercase();
let word_count = class_words
.and_then(|wc| wc.get(&lower))
.copied()
.unwrap_or(0.0);
let prob = (word_count + self.alpha) / (class_total + self.alpha * vocab_size);
log_likelihood += prob.ln();
}
let score = log_prior + log_likelihood;
if score > best_score {
best_score = score;
best_label = label.clone();
}
}
Ok((best_label, best_score))
}
pub fn predict_proba(&self, text: &str) -> Result<HashMap<String, f64>> {
if self.total_docs == 0 {
return Err(TextError::ModelNotFitted("Classifier not trained".into()));
}
let tokens = self.tokenizer.tokenize(text)?;
let vocab_size = self.vocabulary.len() as f64;
let mut log_scores: Vec<(String, f64)> = Vec::new();
for (label, &doc_count) in &self.class_doc_counts {
let log_prior = (doc_count as f64 / self.total_docs as f64).ln();
let class_words = self.word_counts.get(label);
let class_total = self.class_word_totals.get(label).copied().unwrap_or(0.0);
let mut log_likelihood = 0.0;
for token in &tokens {
let lower = token.to_lowercase();
let word_count = class_words
.and_then(|wc| wc.get(&lower))
.copied()
.unwrap_or(0.0);
let prob = (word_count + self.alpha) / (class_total + self.alpha * vocab_size);
log_likelihood += prob.ln();
}
log_scores.push((label.clone(), log_prior + log_likelihood));
}
let max_score = log_scores
.iter()
.map(|(_, s)| *s)
.fold(f64::NEG_INFINITY, f64::max);
let sum_exp: f64 = log_scores.iter().map(|(_, s)| (s - max_score).exp()).sum();
let mut probas = HashMap::new();
for (label, score) in &log_scores {
let prob = (score - max_score).exp() / sum_exp;
probas.insert(label.clone(), prob);
}
Ok(probas)
}
pub fn classes(&self) -> Vec<String> {
self.class_doc_counts.keys().cloned().collect()
}
}
impl Default for NaiveBayesSentiment {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone)]
pub struct AspectSentiment {
pub aspect: String,
pub sentiment: Sentiment,
pub score: f64,
pub context: String,
}
pub struct AspectSentimentAnalyzer {
lexicon: SentimentLexicon,
tokenizer: Box<dyn Tokenizer + Send + Sync>,
context_window: usize,
negation_words: Vec<String>,
}
impl AspectSentimentAnalyzer {
pub fn new() -> Self {
Self {
lexicon: SentimentLexicon::with_basiclexicon(),
tokenizer: Box::new(WordTokenizer::default()),
context_window: 5,
negation_words: vec![
"not".to_string(),
"no".to_string(),
"never".to_string(),
"n't".to_string(),
"without".to_string(),
],
}
}
pub fn with_lexicon(mut self, lexicon: SentimentLexicon) -> Self {
self.lexicon = lexicon;
self
}
pub fn with_context_window(mut self, window: usize) -> Self {
self.context_window = window;
self
}
pub fn analyze(&self, text: &str, aspects: &[&str]) -> Result<Vec<AspectSentiment>> {
let tokens = self.tokenizer.tokenize(text)?;
let lower_tokens: Vec<String> = tokens.iter().map(|t| t.to_lowercase()).collect();
let mut results = Vec::new();
for &aspect in aspects {
let aspect_lower = aspect.to_lowercase();
let aspect_tokens: Vec<String> =
aspect_lower.split_whitespace().map(String::from).collect();
for pos in 0..lower_tokens.len() {
let aspect_matches = if aspect_tokens.len() == 1 {
lower_tokens[pos] == aspect_tokens[0]
} else {
pos + aspect_tokens.len() <= lower_tokens.len()
&& aspect_tokens
.iter()
.enumerate()
.all(|(j, at)| lower_tokens[pos + j] == *at)
};
if !aspect_matches {
continue;
}
let discourse_markers = [
"but",
"however",
"although",
"yet",
"though",
"nevertheless",
];
let mut start = pos.saturating_sub(self.context_window);
let end = (pos + aspect_tokens.len() + self.context_window).min(lower_tokens.len());
let initial_start = start;
if let Some(last_marker_idx) = (initial_start..pos)
.rev()
.find(|&i| discourse_markers.contains(&lower_tokens[i].as_str()))
{
start = last_marker_idx + 1;
}
let mut effective_end = end;
for i in (pos + aspect_tokens.len())..end {
if discourse_markers.contains(&lower_tokens[i].as_str()) {
effective_end = i;
break;
}
}
let mut score = 0.0;
let mut is_negated = false;
for i in start..effective_end {
if i >= pos && i < pos + aspect_tokens.len() {
continue;
}
let token = &lower_tokens[i];
if self.negation_words.contains(token) {
is_negated = true;
continue;
}
let word_score = self.lexicon.get_score(token);
if word_score != 0.0 {
if is_negated {
score -= word_score;
is_negated = false;
} else {
score += word_score;
}
}
}
let context_tokens = &tokens[start..end];
let context = context_tokens.join(" ");
results.push(AspectSentiment {
aspect: aspect.to_string(),
sentiment: Sentiment::from_score(score),
score,
context,
});
}
}
Ok(results)
}
}
impl Default for AspectSentimentAnalyzer {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone)]
pub struct AggregatedSentiment {
pub mean_score: f64,
pub std_score: f64,
pub overall_sentiment: Sentiment,
pub positive_ratio: f64,
pub negative_ratio: f64,
pub neutral_ratio: f64,
pub count: usize,
pub results: Vec<SentimentResult>,
}
pub fn aggregate_sentiment(results: &[SentimentResult]) -> AggregatedSentiment {
if results.is_empty() {
return AggregatedSentiment {
mean_score: 0.0,
std_score: 0.0,
overall_sentiment: Sentiment::Neutral,
positive_ratio: 0.0,
negative_ratio: 0.0,
neutral_ratio: 0.0,
count: 0,
results: Vec::new(),
};
}
let n = results.len() as f64;
let sum: f64 = results.iter().map(|r| r.score).sum();
let mean_score = sum / n;
let variance: f64 = results
.iter()
.map(|r| (r.score - mean_score).powi(2))
.sum::<f64>()
/ n;
let std_score = variance.sqrt();
let mut pos = 0;
let mut neg = 0;
let mut neu = 0;
for r in results {
match r.sentiment {
Sentiment::Positive => pos += 1,
Sentiment::Negative => neg += 1,
Sentiment::Neutral => neu += 1,
}
}
AggregatedSentiment {
mean_score,
std_score,
overall_sentiment: Sentiment::from_score(mean_score),
positive_ratio: pos as f64 / n,
negative_ratio: neg as f64 / n,
neutral_ratio: neu as f64 / n,
count: results.len(),
results: results.to_vec(),
}
}
pub fn analyze_and_aggregate(texts: &[&str]) -> Result<AggregatedSentiment> {
let analyzer = LexiconSentimentAnalyzer::with_basiclexicon();
let results = analyzer.analyze_batch(texts)?;
Ok(aggregate_sentiment(&results))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_sentimentlexicon() {
let mut lexicon = SentimentLexicon::new();
lexicon.add_word("happy".to_string(), 2.0);
lexicon.add_word("sad".to_string(), -2.0);
assert_eq!(lexicon.get_score("happy"), 2.0);
assert_eq!(lexicon.get_score("sad"), -2.0);
assert_eq!(lexicon.get_score("unknown"), 0.0);
}
#[test]
fn test_basic_sentiment_analysis() {
let analyzer = LexiconSentimentAnalyzer::with_basiclexicon();
let positive_result = analyzer
.analyze("This is a wonderful day!")
.expect("Operation failed");
assert_eq!(positive_result.sentiment, Sentiment::Positive);
assert!(positive_result.score > 0.0);
let negative_result = analyzer
.analyze("This is terrible and awful")
.expect("Operation failed");
assert_eq!(negative_result.sentiment, Sentiment::Negative);
assert!(negative_result.score < 0.0);
let neutral_result = analyzer
.analyze("This is a book")
.expect("Operation failed");
assert_eq!(neutral_result.sentiment, Sentiment::Neutral);
}
#[test]
fn test_negation_handling() {
let analyzer = LexiconSentimentAnalyzer::with_basiclexicon();
let negated_result = analyzer
.analyze("This is not good")
.expect("Operation failed");
assert_eq!(negated_result.sentiment, Sentiment::Negative);
assert!(negated_result.score < 0.0);
}
#[test]
fn test_rule_based_sentiment() {
let analyzer = RuleBasedSentimentAnalyzer::with_basiclexicon();
let intensified_result = analyzer
.analyze("This is very good")
.expect("Operation failed");
let normal_result = analyzer.analyze("This is good").expect("Operation failed");
assert!(intensified_result.score > normal_result.score);
}
#[test]
fn test_sentiment_batch_analysis() {
let analyzer = LexiconSentimentAnalyzer::with_basiclexicon();
let texts = vec!["I love this", "I hate this", "This is okay"];
let results = analyzer.analyze_batch(&texts).expect("Operation failed");
assert_eq!(results.len(), 3);
assert_eq!(results[0].sentiment, Sentiment::Positive);
assert_eq!(results[1].sentiment, Sentiment::Negative);
}
#[test]
fn test_vader_positive() {
let vader = VaderSentimentAnalyzer::new();
let result = vader
.analyze("This movie is amazing and wonderful")
.expect("analyze");
assert_eq!(result.sentiment, Sentiment::Positive);
assert!(result.compound > 0.0);
}
#[test]
fn test_vader_negative() {
let vader = VaderSentimentAnalyzer::new();
let result = vader
.analyze("This movie is terrible and awful")
.expect("analyze");
assert_eq!(result.sentiment, Sentiment::Negative);
assert!(result.compound < 0.0);
}
#[test]
fn test_vader_neutral() {
let vader = VaderSentimentAnalyzer::new();
let result = vader.analyze("The sky is blue").expect("analyze");
assert_eq!(result.sentiment, Sentiment::Neutral);
}
#[test]
fn test_vader_negation() {
let vader = VaderSentimentAnalyzer::new();
let result = vader.analyze("This is not good at all").expect("analyze");
assert!(result.compound < 0.0, "Negated positive should be negative");
}
#[test]
fn test_vader_intensifier() {
let vader = VaderSentimentAnalyzer::new();
let base = vader.analyze("This is good").expect("analyze");
let intensified = vader.analyze("This is very good").expect("analyze");
assert!(
intensified.compound > base.compound,
"Intensified should score higher: {} vs {}",
intensified.compound,
base.compound
);
}
#[test]
fn test_vader_but_clause() {
let vader = VaderSentimentAnalyzer::new();
let result = vader
.analyze("The food was good but the service was terrible")
.expect("analyze");
assert!(result.compound < 0.0);
}
#[test]
fn test_vader_caps_emphasis() {
let vader = VaderSentimentAnalyzer::new();
let normal = vader.analyze("This is good").expect("analyze");
let caps = vader.analyze("This is GOOD").expect("analyze");
assert!(
caps.compound >= normal.compound,
"CAPS should score higher or equal"
);
}
#[test]
fn test_vader_batch() {
let vader = VaderSentimentAnalyzer::new();
let texts = vec!["I love this!", "I hate this!"];
let results = vader.analyze_batch(&texts).expect("batch");
assert_eq!(results.len(), 2);
assert_eq!(results[0].sentiment, Sentiment::Positive);
assert_eq!(results[1].sentiment, Sentiment::Negative);
}
#[test]
fn test_vader_compound_range() {
let vader = VaderSentimentAnalyzer::new();
let result = vader
.analyze("This is the most absolutely amazing incredible thing ever!!!")
.expect("analyze");
assert!(result.compound >= -1.0 && result.compound <= 1.0);
}
#[test]
fn test_naive_bayes_train_predict() {
let mut clf = NaiveBayesSentiment::new();
let texts = vec![
"I love this product it is amazing",
"Great quality excellent experience",
"Wonderful service very happy",
"This is terrible and awful",
"Horrible experience very bad",
"Worst product I have ever bought",
];
let labels = vec![
"positive", "positive", "positive", "negative", "negative", "negative",
];
clf.train(&texts, &labels).expect("training failed");
let pred = clf.predict("This is amazing and great").expect("predict");
assert_eq!(pred, "positive");
let pred = clf
.predict("This is terrible and horrible")
.expect("predict");
assert_eq!(pred, "negative");
}
#[test]
fn test_naive_bayes_predict_proba() {
let mut clf = NaiveBayesSentiment::new();
let texts = vec![
"good great excellent",
"good wonderful amazing",
"bad terrible awful",
"bad horrible disgusting",
];
let labels = vec!["positive", "positive", "negative", "negative"];
clf.train(&texts, &labels).expect("training failed");
let probas = clf.predict_proba("good excellent").expect("predict_proba");
assert!(probas.contains_key("positive"));
assert!(probas.contains_key("negative"));
let pos_prob = probas.get("positive").copied().unwrap_or(0.0);
let neg_prob = probas.get("negative").copied().unwrap_or(0.0);
assert!(pos_prob > neg_prob);
let total: f64 = probas.values().sum();
assert!((total - 1.0).abs() < 1e-6);
}
#[test]
fn test_naive_bayes_not_trained() {
let clf = NaiveBayesSentiment::new();
let result = clf.predict("test");
assert!(result.is_err());
}
#[test]
fn test_naive_bayes_classes() {
let mut clf = NaiveBayesSentiment::new();
let texts = vec!["a", "b", "c"];
let labels = vec!["pos", "neg", "pos"];
clf.train(&texts, &labels).expect("train");
let classes = clf.classes();
assert_eq!(classes.len(), 2);
}
#[test]
fn test_aspect_sentiment_basic() {
let analyzer = AspectSentimentAnalyzer::new();
let results = analyzer
.analyze(
"The food was excellent but the service was terrible",
&["food", "service"],
)
.expect("analyze");
assert_eq!(results.len(), 2);
let food_result = results.iter().find(|r| r.aspect == "food");
assert!(food_result.is_some());
let food = food_result.expect("food aspect");
assert_eq!(food.sentiment, Sentiment::Positive);
let service_result = results.iter().find(|r| r.aspect == "service");
assert!(service_result.is_some());
let service = service_result.expect("service aspect");
assert_eq!(service.sentiment, Sentiment::Negative);
}
#[test]
fn test_aspect_sentiment_negation() {
let analyzer = AspectSentimentAnalyzer::new();
let results = analyzer
.analyze("The price was not good", &["price"])
.expect("analyze");
assert!(!results.is_empty());
assert_eq!(results[0].sentiment, Sentiment::Negative);
}
#[test]
fn test_aspect_sentiment_no_match() {
let analyzer = AspectSentimentAnalyzer::new();
let results = analyzer
.analyze("The sky is blue", &["food", "service"])
.expect("analyze");
assert!(results.is_empty());
}
#[test]
fn test_aspect_with_custom_window() {
let analyzer = AspectSentimentAnalyzer::new().with_context_window(2);
let results = analyzer
.analyze("The food here is really great and beautiful", &["food"])
.expect("analyze");
assert!(!results.is_empty());
}
#[test]
fn test_aggregate_sentiment() {
let analyzer = LexiconSentimentAnalyzer::with_basiclexicon();
let results = analyzer
.analyze_batch(&["I love this", "I love this too", "This is terrible"])
.expect("batch");
let agg = aggregate_sentiment(&results);
assert_eq!(agg.count, 3);
assert!(agg.mean_score > 0.0); assert!(agg.positive_ratio > 0.5);
assert!(agg.std_score > 0.0);
}
#[test]
fn test_aggregate_empty() {
let agg = aggregate_sentiment(&[]);
assert_eq!(agg.count, 0);
assert_eq!(agg.overall_sentiment, Sentiment::Neutral);
}
#[test]
fn test_analyze_and_aggregate() {
let texts = vec!["I love this product", "It is amazing", "Very good quality"];
let agg = analyze_and_aggregate(&texts).expect("aggregate");
assert_eq!(agg.count, 3);
assert!(agg.mean_score > 0.0);
assert_eq!(agg.overall_sentiment, Sentiment::Positive);
}
#[test]
fn test_sentiment_display() {
assert_eq!(format!("{}", Sentiment::Positive), "Positive");
assert_eq!(format!("{}", Sentiment::Negative), "Negative");
assert_eq!(format!("{}", Sentiment::Neutral), "Neutral");
}
#[test]
fn test_sentiment_from_score_thresholds() {
assert_eq!(Sentiment::from_score(0.1), Sentiment::Positive);
assert_eq!(Sentiment::from_score(-0.1), Sentiment::Negative);
assert_eq!(Sentiment::from_score(0.0), Sentiment::Neutral);
assert_eq!(Sentiment::from_score(0.03), Sentiment::Neutral);
}
}