1use crate::error::Result;
7use crate::tokenize::{Tokenizer, WordTokenizer};
8use std::collections::HashMap;
9
10#[derive(Debug, Clone, Copy, PartialEq)]
12pub enum Sentiment {
13 Positive,
15 Negative,
17 Neutral,
19}
20
21impl std::fmt::Display for Sentiment {
22 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
23 match self {
24 Sentiment::Positive => write!(f, "Positive"),
25 Sentiment::Negative => write!(f, "Negative"),
26 Sentiment::Neutral => write!(f, "Neutral"),
27 }
28 }
29}
30
31impl Sentiment {
32 pub fn to_score(&self) -> f64 {
34 match self {
35 Sentiment::Positive => 1.0,
36 Sentiment::Neutral => 0.0,
37 Sentiment::Negative => -1.0,
38 }
39 }
40
41 pub fn from_score(score: f64) -> Self {
43 if score > 0.0 {
44 Sentiment::Positive
45 } else if score < 0.0 {
46 Sentiment::Negative
47 } else {
48 Sentiment::Neutral
49 }
50 }
51}
52
53#[derive(Debug, Clone)]
55pub struct SentimentResult {
56 pub sentiment: Sentiment,
58 pub score: f64,
60 pub confidence: f64,
62 pub word_counts: SentimentWordCounts,
64}
65
66#[derive(Debug, Clone, Default)]
68pub struct SentimentWordCounts {
69 pub positive_words: usize,
71 pub negative_words: usize,
73 pub neutral_words: usize,
75 pub total_words: usize,
77}
78
79#[derive(Debug, Clone)]
81pub struct SentimentLexicon {
82 lexicon: HashMap<String, f64>,
84 default_score: f64,
86}
87
88impl SentimentLexicon {
89 pub fn new() -> Self {
91 Self {
92 lexicon: HashMap::new(),
93 default_score: 0.0,
94 }
95 }
96
97 pub fn with_basiclexicon() -> Self {
99 let mut lexicon = HashMap::new();
100
101 let positive_words = [
103 ("good", 1.0),
104 ("great", 2.0),
105 ("excellent", 3.0),
106 ("amazing", 3.0),
107 ("wonderful", 2.5),
108 ("fantastic", 2.5),
109 ("love", 2.0),
110 ("like", 1.0),
111 ("happy", 2.0),
112 ("joy", 2.0),
113 ("pleased", 1.5),
114 ("satisfied", 1.0),
115 ("positive", 1.0),
116 ("perfect", 3.0),
117 ("best", 2.5),
118 ("awesome", 2.5),
119 ("beautiful", 2.0),
120 ("brilliant", 2.5),
121 ("superb", 2.5),
122 ("nice", 1.0),
123 ];
124
125 let negative_words = [
127 ("bad", -1.0),
128 ("terrible", -2.5),
129 ("awful", -2.5),
130 ("horrible", -3.0),
131 ("hate", -2.5),
132 ("dislike", -1.5),
133 ("sad", -2.0),
134 ("unhappy", -2.0),
135 ("disappointed", -2.0),
136 ("negative", -1.0),
137 ("worst", -3.0),
138 ("poor", -1.5),
139 ("disgusting", -3.0),
140 ("ugly", -2.0),
141 ("nasty", -2.5),
142 ("stupid", -2.0),
143 ("pathetic", -2.5),
144 ("failure", -2.0),
145 ("fail", -2.0),
146 ("sucks", -2.0),
147 ];
148
149 for (word, score) in &positive_words {
150 lexicon.insert(word.to_string(), *score);
151 }
152
153 for (word, score) in &negative_words {
154 lexicon.insert(word.to_string(), *score);
155 }
156
157 Self {
158 lexicon,
159 default_score: 0.0,
160 }
161 }
162
163 pub fn add_word(&mut self, word: String, score: f64) {
165 self.lexicon.insert(word.to_lowercase(), score);
166 }
167
168 pub fn get_score(&self, word: &str) -> f64 {
170 self.lexicon
171 .get(&word.to_lowercase())
172 .copied()
173 .unwrap_or(self.default_score)
174 }
175
176 pub fn contains(&self, word: &str) -> bool {
178 self.lexicon.contains_key(&word.to_lowercase())
179 }
180
181 pub fn len(&self) -> usize {
183 self.lexicon.len()
184 }
185
186 pub fn is_empty(&self) -> bool {
188 self.lexicon.is_empty()
189 }
190}
191
192impl Default for SentimentLexicon {
193 fn default() -> Self {
194 Self::new()
195 }
196}
197
198pub struct LexiconSentimentAnalyzer {
200 lexicon: SentimentLexicon,
202 tokenizer: Box<dyn Tokenizer + Send + Sync>,
204 negation_words: Vec<String>,
206 negation_window: usize,
208}
209
210impl LexiconSentimentAnalyzer {
211 pub fn new(lexicon: SentimentLexicon) -> Self {
213 let negation_words = vec![
214 "not".to_string(),
215 "no".to_string(),
216 "never".to_string(),
217 "neither".to_string(),
218 "nobody".to_string(),
219 "nothing".to_string(),
220 "nowhere".to_string(),
221 "n't".to_string(),
222 "cannot".to_string(),
223 "without".to_string(),
224 ];
225
226 Self {
227 lexicon,
228 tokenizer: Box::new(WordTokenizer::default()),
229 negation_words,
230 negation_window: 3,
231 }
232 }
233
234 pub fn with_basiclexicon() -> Self {
236 Self::new(SentimentLexicon::with_basiclexicon())
237 }
238
239 pub fn with_tokenizer(mut self, tokenizer: Box<dyn Tokenizer + Send + Sync>) -> Self {
241 self.tokenizer = tokenizer;
242 self
243 }
244
245 pub fn analyze(&self, text: &str) -> Result<SentimentResult> {
247 let tokens = self.tokenizer.tokenize(text)?;
248
249 if tokens.is_empty() {
250 return Ok(SentimentResult {
251 sentiment: Sentiment::Neutral,
252 score: 0.0,
253 confidence: 0.0,
254 word_counts: SentimentWordCounts {
255 positive_words: 0,
256 negative_words: 0,
257 neutral_words: 0,
258 total_words: 0,
259 },
260 });
261 }
262
263 let mut total_score = 0.0;
264 let mut positive_count = 0;
265 let mut negative_count = 0;
266 let mut neutral_count = 0;
267
268 for (i, token) in tokens.iter().enumerate() {
270 let token_lower = token.to_lowercase();
271 let mut score = self.lexicon.get_score(&token_lower);
272
273 if score != 0.0 {
275 for j in 1..=self.negation_window.min(i) {
276 let prev_token = &tokens[i - j].to_lowercase();
277 if self.negation_words.contains(prev_token) {
278 score *= -1.0;
279 break;
280 }
281 }
282 }
283
284 total_score += score;
285
286 if score > 0.0 {
287 positive_count += 1;
288 } else if score < 0.0 {
289 negative_count += 1;
290 } else {
291 neutral_count += 1;
292 }
293 }
294
295 let total_words = tokens.len();
296 let sentiment = Sentiment::from_score(total_score);
297
298 let sentiment_words = positive_count + negative_count;
300 let confidence = if total_words > 0 {
301 (sentiment_words as f64 / total_words as f64).min(1.0)
302 } else {
303 0.0
304 };
305
306 Ok(SentimentResult {
307 sentiment,
308 score: total_score,
309 confidence,
310 word_counts: SentimentWordCounts {
311 positive_words: positive_count,
312 negative_words: negative_count,
313 neutral_words: neutral_count,
314 total_words,
315 },
316 })
317 }
318
319 pub fn analyze_batch(&self, texts: &[&str]) -> Result<Vec<SentimentResult>> {
321 texts.iter().map(|&text| self.analyze(text)).collect()
322 }
323}
324
325#[derive(Debug, Clone)]
327pub struct SentimentRules {
328 intensifiers: HashMap<String, f64>,
330 diminishers: HashMap<String, f64>,
332}
333
334impl Default for SentimentRules {
335 fn default() -> Self {
336 let mut intensifiers = HashMap::new();
337 intensifiers.insert("very".to_string(), 1.5);
338 intensifiers.insert("extremely".to_string(), 2.0);
339 intensifiers.insert("incredibly".to_string(), 2.0);
340 intensifiers.insert("really".to_string(), 1.3);
341 intensifiers.insert("so".to_string(), 1.3);
342 intensifiers.insert("absolutely".to_string(), 2.0);
343
344 let mut diminishers = HashMap::new();
345 diminishers.insert("somewhat".to_string(), 0.5);
346 diminishers.insert("slightly".to_string(), 0.5);
347 diminishers.insert("barely".to_string(), 0.3);
348 diminishers.insert("hardly".to_string(), 0.3);
349 diminishers.insert("a little".to_string(), 0.5);
350
351 Self {
352 intensifiers,
353 diminishers,
354 }
355 }
356}
357
358impl SentimentRules {
359 pub fn apply(&self, tokens: &[String], basescores: &[f64]) -> Vec<f64> {
361 let mut modified_scores = basescores.to_vec();
362
363 for (i, score) in modified_scores.iter_mut().enumerate() {
364 if *score == 0.0 {
365 continue;
366 }
367
368 for j in 1..=2.min(i) {
370 let prev_token = &tokens[i - j].to_lowercase();
371
372 if let Some(&multiplier) = self.intensifiers.get(prev_token) {
373 *score *= multiplier;
374 break;
375 } else if let Some(&multiplier) = self.diminishers.get(prev_token) {
376 *score *= multiplier;
377 break;
378 }
379 }
380 }
381
382 modified_scores
383 }
384}
385
386pub struct RuleBasedSentimentAnalyzer {
388 base_analyzer: LexiconSentimentAnalyzer,
390 rules: SentimentRules,
392}
393
394impl RuleBasedSentimentAnalyzer {
395 pub fn new(lexicon: SentimentLexicon) -> Self {
397 Self {
398 base_analyzer: LexiconSentimentAnalyzer::new(lexicon),
399 rules: SentimentRules::default(),
400 }
401 }
402
403 pub fn with_basiclexicon() -> Self {
405 Self::new(SentimentLexicon::with_basiclexicon())
406 }
407
408 pub fn analyze(&self, text: &str) -> Result<SentimentResult> {
410 let tokens = self.base_analyzer.tokenizer.tokenize(text)?;
411
412 if tokens.is_empty() {
413 return self.base_analyzer.analyze(text);
414 }
415
416 let basescores: Vec<f64> = tokens
418 .iter()
419 .map(|token| self.base_analyzer.lexicon.get_score(token))
420 .collect();
421
422 let modified_scores = self.rules.apply(&tokens, &basescores);
424
425 let total_score: f64 = modified_scores.iter().sum();
427 let sentiment = Sentiment::from_score(total_score);
428
429 let mut positive_count = 0;
431 let mut negative_count = 0;
432 let mut neutral_count = 0;
433
434 for &score in &modified_scores {
435 if score > 0.0 {
436 positive_count += 1;
437 } else if score < 0.0 {
438 negative_count += 1;
439 } else {
440 neutral_count += 1;
441 }
442 }
443
444 let total_words = tokens.len();
445 let sentiment_words = positive_count + negative_count;
446 let confidence = if total_words > 0 {
447 (sentiment_words as f64 / total_words as f64).min(1.0)
448 } else {
449 0.0
450 };
451
452 Ok(SentimentResult {
453 sentiment,
454 score: total_score,
455 confidence,
456 word_counts: SentimentWordCounts {
457 positive_words: positive_count,
458 negative_words: negative_count,
459 neutral_words: neutral_count,
460 total_words,
461 },
462 })
463 }
464}
465
466#[cfg(test)]
467mod tests {
468 use super::*;
469
470 #[test]
471 fn test_sentimentlexicon() {
472 let mut lexicon = SentimentLexicon::new();
473 lexicon.add_word("happy".to_string(), 2.0);
474 lexicon.add_word("sad".to_string(), -2.0);
475
476 assert_eq!(lexicon.get_score("happy"), 2.0);
477 assert_eq!(lexicon.get_score("sad"), -2.0);
478 assert_eq!(lexicon.get_score("unknown"), 0.0);
479 }
480
481 #[test]
482 fn test_basic_sentiment_analysis() {
483 let analyzer = LexiconSentimentAnalyzer::with_basiclexicon();
484
485 let positive_result = analyzer.analyze("This is a wonderful day!").unwrap();
486 assert_eq!(positive_result.sentiment, Sentiment::Positive);
487 assert!(positive_result.score > 0.0);
488
489 let negative_result = analyzer.analyze("This is terrible and awful").unwrap();
490 assert_eq!(negative_result.sentiment, Sentiment::Negative);
491 assert!(negative_result.score < 0.0);
492
493 let neutral_result = analyzer.analyze("This is a book").unwrap();
494 assert_eq!(neutral_result.sentiment, Sentiment::Neutral);
495 assert_eq!(neutral_result.score, 0.0);
496 }
497
498 #[test]
499 fn test_negation_handling() {
500 let analyzer = LexiconSentimentAnalyzer::with_basiclexicon();
501
502 let negated_result = analyzer.analyze("This is not good").unwrap();
503 assert_eq!(negated_result.sentiment, Sentiment::Negative);
504 assert!(negated_result.score < 0.0);
505 }
506
507 #[test]
508 fn test_rule_based_sentiment() {
509 let analyzer = RuleBasedSentimentAnalyzer::with_basiclexicon();
510
511 let intensified_result = analyzer.analyze("This is very good").unwrap();
512 let normal_result = analyzer.analyze("This is good").unwrap();
513
514 assert!(intensified_result.score > normal_result.score);
515 }
516
517 #[test]
518 fn test_sentiment_batch_analysis() {
519 let analyzer = LexiconSentimentAnalyzer::with_basiclexicon();
520 let texts = vec!["I love this", "I hate this", "This is okay"];
521
522 let results = analyzer.analyze_batch(&texts).unwrap();
523 assert_eq!(results.len(), 3);
524 assert_eq!(results[0].sentiment, Sentiment::Positive);
525 assert_eq!(results[1].sentiment, Sentiment::Negative);
526 }
527}