use crate::AprenderError;
use std::collections::HashMap;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Polarity {
Positive,
Negative,
Neutral,
}
#[derive(Debug)]
pub struct SentimentAnalyzer {
lexicon: HashMap<String, f64>,
neutral_threshold: f64,
}
impl SentimentAnalyzer {
#[must_use]
pub fn new() -> Self {
Self {
lexicon: Self::default_lexicon(),
neutral_threshold: 0.05,
}
}
#[must_use]
pub fn with_lexicon(lexicon: HashMap<String, f64>) -> Self {
Self {
lexicon,
neutral_threshold: 0.05,
}
}
#[must_use]
pub fn with_neutral_threshold(mut self, threshold: f64) -> Self {
self.neutral_threshold = threshold;
self
}
pub fn score(&self, text: &str) -> Result<f64, AprenderError> {
if text.is_empty() {
return Ok(0.0);
}
let tokens: Vec<String> = text
.to_lowercase()
.split(|c: char| c.is_whitespace() || c.is_ascii_punctuation())
.filter(|s| !s.is_empty())
.map(String::from)
.collect();
if tokens.is_empty() {
return Ok(0.0);
}
let total_score: f64 = tokens
.iter()
.filter_map(|token| self.lexicon.get(token))
.sum();
let normalized_score = total_score / tokens.len() as f64;
Ok(normalized_score)
}
pub fn classify(&self, text: &str) -> Result<Polarity, AprenderError> {
let score = self.score(text)?;
if score > self.neutral_threshold {
Ok(Polarity::Positive)
} else if score < -self.neutral_threshold {
Ok(Polarity::Negative)
} else {
Ok(Polarity::Neutral)
}
}
#[must_use]
pub fn lexicon_size(&self) -> usize {
self.lexicon.len()
}
fn default_lexicon() -> HashMap<String, f64> {
let mut lexicon = HashMap::new();
let positive_strong = vec![
"excellent",
"amazing",
"wonderful",
"fantastic",
"outstanding",
"superb",
"brilliant",
"perfect",
"love",
"loved",
];
for word in positive_strong {
lexicon.insert(word.to_string(), 3.0);
}
let positive_moderate = vec![
"good",
"great",
"nice",
"fine",
"happy",
"glad",
"pleased",
"enjoy",
"like",
"better",
"best",
"positive",
"beautiful",
"awesome",
];
for word in positive_moderate {
lexicon.insert(word.to_string(), 2.0);
}
let positive_mild = vec!["ok", "okay", "decent", "fair", "acceptable", "well"];
for word in positive_mild {
lexicon.insert(word.to_string(), 1.0);
}
let negative_strong = vec![
"terrible",
"awful",
"horrible",
"disgusting",
"worst",
"hate",
"hated",
"dreadful",
"atrocious",
"pathetic",
];
for word in negative_strong {
lexicon.insert(word.to_string(), -3.0);
}
let negative_moderate = vec![
"bad",
"poor",
"disappointing",
"sad",
"unhappy",
"angry",
"upset",
"annoying",
"worse",
"negative",
"ugly",
"boring",
];
for word in negative_moderate {
lexicon.insert(word.to_string(), -2.0);
}
let negative_mild = vec!["mediocre", "meh", "dull", "weak", "minor", "slight"];
for word in negative_mild {
lexicon.insert(word.to_string(), -1.0);
}
lexicon.insert("very".to_string(), 1.5);
lexicon.insert("really".to_string(), 1.5);
lexicon.insert("extremely".to_string(), 2.0);
lexicon.insert("absolutely".to_string(), 2.0);
lexicon.insert("not".to_string(), -1.0);
lexicon.insert("no".to_string(), -1.0);
lexicon.insert("never".to_string(), -1.5);
lexicon
}
}
impl Default for SentimentAnalyzer {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
#[path = "sentiment_tests.rs"]
mod tests;
#[cfg(test)]
#[path = "sentiment_contract_falsify.rs"]
mod sentiment_contract_falsify;