Skip to main content

tuitbot_core/scoring/
mod.rs

1//! Tweet scoring engine for reply-worthiness evaluation.
2//!
3//! Combines four independent signals (keyword relevance, follower score,
4//! recency, engagement rate) into a total score (0-100) with a configurable
5//! threshold for the REPLY/SKIP verdict.
6//!
7//! All scoring is purely heuristic -- no LLM calls.
8
9pub mod signals;
10
11use crate::config::ScoringConfig;
12use chrono::{DateTime, Utc};
13
14/// Input data for scoring a tweet.
15///
16/// This struct decouples the scoring engine from specific API types,
17/// allowing the engine to be used with any data source.
18#[derive(Debug, Clone)]
19pub struct TweetData {
20    /// The tweet text content.
21    pub text: String,
22    /// ISO-8601 timestamp of when the tweet was created.
23    pub created_at: String,
24    /// Number of likes on the tweet.
25    pub likes: u64,
26    /// Number of retweets.
27    pub retweets: u64,
28    /// Number of replies.
29    pub replies: u64,
30    /// Author's username (for display).
31    pub author_username: String,
32    /// Author's follower count.
33    pub author_followers: u64,
34    /// Whether the tweet has attached media (images, video, etc.).
35    #[allow(dead_code)]
36    pub has_media: bool,
37    /// Whether the tweet is a quote tweet.
38    #[allow(dead_code)]
39    pub is_quote_tweet: bool,
40}
41
42/// Per-signal score breakdown for a tweet.
43#[derive(Debug, Clone)]
44pub struct TweetScore {
45    /// Total score (0-100), clamped.
46    pub total: f32,
47    /// Keyword relevance signal score.
48    pub keyword_relevance: f32,
49    /// Author follower count signal score.
50    pub follower: f32,
51    /// Tweet recency signal score.
52    pub recency: f32,
53    /// Engagement rate signal score.
54    pub engagement: f32,
55    /// Reply count signal score (fewer replies = higher).
56    pub reply_count: f32,
57    /// Content type signal score (text-only = max).
58    pub content_type: f32,
59    /// Whether the total score meets the configured threshold.
60    pub meets_threshold: bool,
61}
62
63/// Scoring engine that combines all signals into a unified score.
64pub struct ScoringEngine {
65    config: ScoringConfig,
66    keywords: Vec<String>,
67}
68
69impl ScoringEngine {
70    /// Create a new scoring engine with the given config and keywords.
71    ///
72    /// Keywords should be the combined list of `product_keywords` and
73    /// `competitor_keywords` from the business profile.
74    pub fn new(config: ScoringConfig, keywords: Vec<String>) -> Self {
75        Self { config, keywords }
76    }
77
78    /// Score a tweet using all four signals.
79    ///
80    /// Uses the current time for recency scoring.
81    pub fn score_tweet(&self, tweet: &TweetData) -> TweetScore {
82        self.score_tweet_at(tweet, Utc::now())
83    }
84
85    /// Score a tweet using all six signals with a specific time reference.
86    ///
87    /// Accepts `now` for deterministic testing.
88    pub fn score_tweet_at(&self, tweet: &TweetData, now: DateTime<Utc>) -> TweetScore {
89        let keyword_relevance = signals::keyword_relevance(
90            &tweet.text,
91            &self.keywords,
92            self.config.keyword_relevance_max,
93        );
94
95        let follower = signals::targeted_follower_score(
96            tweet.author_followers,
97            self.config.follower_count_max,
98        );
99
100        let recency = signals::recency_score_at(&tweet.created_at, self.config.recency_max, now);
101
102        let engagement = signals::engagement_rate(
103            tweet.likes,
104            tweet.retweets,
105            tweet.replies,
106            tweet.author_followers,
107            self.config.engagement_rate_max,
108        );
109
110        let reply_count = signals::reply_count_score(tweet.replies, self.config.reply_count_max);
111
112        let content_type = signals::content_type_score(
113            tweet.has_media,
114            tweet.is_quote_tweet,
115            self.config.content_type_max,
116        );
117
118        let total =
119            (keyword_relevance + follower + recency + engagement + reply_count + content_type)
120                .clamp(0.0, 100.0);
121        let meets_threshold = total >= self.config.threshold as f32;
122
123        tracing::debug!(
124            author = %tweet.author_username,
125            total = format!("{:.0}", total),
126            keyword = format!("{:.0}", keyword_relevance),
127            follower = format!("{:.0}", follower),
128            recency = format!("{:.0}", recency),
129            engagement = format!("{:.0}", engagement),
130            reply = format!("{:.0}", reply_count),
131            content = format!("{:.0}", content_type),
132            meets = meets_threshold,
133            "Scored tweet",
134        );
135
136        TweetScore {
137            total,
138            keyword_relevance,
139            follower,
140            recency,
141            engagement,
142            reply_count,
143            content_type,
144            meets_threshold,
145        }
146    }
147
148    /// Return the configured keywords.
149    pub fn keywords(&self) -> &[String] {
150        &self.keywords
151    }
152
153    /// Return the scoring configuration.
154    pub fn config(&self) -> &ScoringConfig {
155        &self.config
156    }
157}
158
159/// Find which keywords matched a tweet (case-insensitive).
160///
161/// Returns the subset of keywords present in the tweet text.
162/// Used for display purposes -- the actual scoring uses weighted counts.
163pub fn find_matched_keywords(tweet_text: &str, keywords: &[String]) -> Vec<String> {
164    let text_lower = tweet_text.to_lowercase();
165    keywords
166        .iter()
167        .filter(|kw| text_lower.contains(&kw.to_lowercase()))
168        .cloned()
169        .collect()
170}
171
172/// Format a follower count for display.
173///
174/// Examples: 500 -> "500", 1200 -> "1.2K", 45300 -> "45.3K", 1200000 -> "1.2M".
175pub fn format_follower_count(count: u64) -> String {
176    if count >= 1_000_000 {
177        format!("{:.1}M", count as f64 / 1_000_000.0)
178    } else if count >= 1_000 {
179        format!("{:.1}K", count as f64 / 1_000.0)
180    } else {
181        count.to_string()
182    }
183}
184
185/// Format a tweet's age for display.
186///
187/// Parses the ISO-8601 timestamp and returns a human-readable duration
188/// like "12 minutes", "2 hours", "1 day". Returns "unknown" on parse failure.
189pub fn format_tweet_age(created_at: &str) -> String {
190    format_tweet_age_at(created_at, Utc::now())
191}
192
193/// Format a tweet's age relative to a specific time (for testability).
194pub fn format_tweet_age_at(created_at: &str, now: DateTime<Utc>) -> String {
195    let created = match created_at.parse::<DateTime<Utc>>() {
196        Ok(dt) => dt,
197        Err(_) => return "unknown".to_string(),
198    };
199
200    let duration = now - created;
201    let minutes = duration.num_minutes();
202    let hours = duration.num_hours();
203    let days = duration.num_days();
204
205    if minutes < 1 {
206        let secs = duration.num_seconds().max(0);
207        format!("{secs} seconds")
208    } else if minutes < 60 {
209        format!("{minutes} minutes")
210    } else if hours < 24 {
211        format!("{hours} hours")
212    } else {
213        format!("{days} days")
214    }
215}
216
217/// Truncate text for display, appending "..." if truncated.
218pub fn truncate_text(text: &str, max_len: usize) -> String {
219    if text.len() <= max_len {
220        text.to_string()
221    } else {
222        format!("{}...", &text[..max_len])
223    }
224}
225
226impl TweetScore {
227    /// Format a human-readable breakdown of the score.
228    ///
229    /// Shows the total score, per-signal breakdown with context,
230    /// and the REPLY/SKIP verdict.
231    pub fn format_breakdown(
232        &self,
233        config: &ScoringConfig,
234        tweet: &TweetData,
235        matched_keywords: &[String],
236    ) -> String {
237        let truncated = truncate_text(&tweet.text, 50);
238        let formatted_followers = format_follower_count(tweet.author_followers);
239        let age = format_tweet_age(&tweet.created_at);
240        let matched_list = if matched_keywords.is_empty() {
241            "none".to_string()
242        } else {
243            matched_keywords.join(", ")
244        };
245
246        let total_engagement = tweet.likes + tweet.retweets + tweet.replies;
247        let followers_for_rate = tweet.author_followers.max(1) as f64;
248        let rate_pct = (total_engagement as f64 / followers_for_rate) * 100.0;
249
250        let verdict = if self.meets_threshold {
251            "REPLY"
252        } else {
253            "SKIP"
254        };
255
256        let reply_count_display = tweet.replies;
257
258        format!(
259            "Tweet: \"{}\" by @{} ({} followers)\n\
260             Score: {:.0}/100\n\
261             \x20 Keyword relevance:  {:.0}/{}  (matched: {})\n\
262             \x20 Author reach:       {:.0}/{}  ({} followers, bell curve)\n\
263             \x20 Recency:            {:.0}/{}  (posted {} ago)\n\
264             \x20 Engagement rate:    {:.0}/{}  ({:.1}% engagement vs 1.5% baseline)\n\
265             \x20 Reply count:        {:.0}/{}  ({} existing replies)\n\
266             \x20 Content type:       {:.0}/{}  ({})\n\
267             Verdict: {} (threshold: {})",
268            truncated,
269            tweet.author_username,
270            formatted_followers,
271            self.total,
272            self.keyword_relevance,
273            config.keyword_relevance_max as u32,
274            matched_list,
275            self.follower,
276            config.follower_count_max as u32,
277            formatted_followers,
278            self.recency,
279            config.recency_max as u32,
280            age,
281            self.engagement,
282            config.engagement_rate_max as u32,
283            rate_pct,
284            self.reply_count,
285            config.reply_count_max as u32,
286            reply_count_display,
287            self.content_type,
288            config.content_type_max as u32,
289            if tweet.has_media || tweet.is_quote_tweet {
290                "media/quote"
291            } else {
292                "text-only"
293            },
294            verdict,
295            config.threshold,
296        )
297    }
298}
299
300impl std::fmt::Display for TweetScore {
301    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
302        write!(
303            f,
304            "Score: {:.0}/100 [kw:{:.0} fol:{:.0} rec:{:.0} eng:{:.0} rep:{:.0} ct:{:.0}] {}",
305            self.total,
306            self.keyword_relevance,
307            self.follower,
308            self.recency,
309            self.engagement,
310            self.reply_count,
311            self.content_type,
312            if self.meets_threshold {
313                "REPLY"
314            } else {
315                "SKIP"
316            }
317        )
318    }
319}
320
321#[cfg(test)]
322mod tests {
323    use super::*;
324    use crate::config::ScoringConfig;
325    use chrono::Duration;
326
327    fn default_scoring_config() -> ScoringConfig {
328        ScoringConfig {
329            threshold: 60,
330            keyword_relevance_max: 25.0,
331            follower_count_max: 15.0,
332            recency_max: 10.0,
333            engagement_rate_max: 15.0,
334            reply_count_max: 15.0,
335            content_type_max: 10.0,
336        }
337    }
338
339    fn test_tweet(now: DateTime<Utc>) -> TweetData {
340        TweetData {
341            text: "Building amazing Rust CLI tools for developers".to_string(),
342            created_at: (now - Duration::minutes(10)).to_rfc3339(),
343            likes: 20,
344            retweets: 5,
345            replies: 3,
346            author_username: "devuser".to_string(),
347            author_followers: 5000,
348            has_media: false,
349            is_quote_tweet: false,
350        }
351    }
352
353    // --- ScoringEngine tests ---
354
355    #[test]
356    fn score_total_is_sum_of_signals() {
357        let config = default_scoring_config();
358        let keywords = vec!["rust".to_string(), "cli".to_string()];
359        let engine = ScoringEngine::new(config, keywords);
360        let now = Utc::now();
361        let tweet = test_tweet(now);
362
363        let score = engine.score_tweet_at(&tweet, now);
364        let expected_total = score.keyword_relevance
365            + score.follower
366            + score.recency
367            + score.engagement
368            + score.reply_count
369            + score.content_type;
370        assert!((score.total - expected_total).abs() < 0.01);
371    }
372
373    #[test]
374    fn score_total_clamped_to_100() {
375        // Use very high max values to force total > 100
376        let config = ScoringConfig {
377            threshold: 70,
378            keyword_relevance_max: 80.0,
379            follower_count_max: 80.0,
380            recency_max: 80.0,
381            engagement_rate_max: 80.0,
382            reply_count_max: 80.0,
383            content_type_max: 80.0,
384        };
385        let keywords = vec!["rust".to_string()];
386        let engine = ScoringEngine::new(config, keywords);
387        let now = Utc::now();
388        let tweet = test_tweet(now);
389
390        let score = engine.score_tweet_at(&tweet, now);
391        assert!(score.total <= 100.0);
392    }
393
394    #[test]
395    fn score_total_includes_new_signals() {
396        let config = default_scoring_config();
397        let keywords = vec!["rust".to_string()];
398        let engine = ScoringEngine::new(config, keywords);
399        let now = Utc::now();
400        let tweet = test_tweet(now);
401        let score = engine.score_tweet_at(&tweet, now);
402        // reply_count and content_type should contribute
403        assert!(score.reply_count > 0.0);
404        assert!(score.content_type > 0.0);
405    }
406
407    #[test]
408    fn score_zero_reply_higher_than_many_replies() {
409        let config = default_scoring_config();
410        let keywords = vec!["rust".to_string()];
411        let engine = ScoringEngine::new(config, keywords);
412        let now = Utc::now();
413
414        let mut tweet_few = test_tweet(now);
415        tweet_few.replies = 0;
416
417        let mut tweet_many = test_tweet(now);
418        tweet_many.replies = 50;
419
420        let score_few = engine.score_tweet_at(&tweet_few, now);
421        let score_many = engine.score_tweet_at(&tweet_many, now);
422        assert!(score_few.total > score_many.total);
423    }
424
425    #[test]
426    fn score_1k_follower_higher_than_100k() {
427        let config = default_scoring_config();
428        let keywords = vec!["rust".to_string()];
429        let engine = ScoringEngine::new(config, keywords);
430        let now = Utc::now();
431
432        let mut tweet_1k = test_tweet(now);
433        tweet_1k.author_followers = 1_000;
434
435        let mut tweet_100k = test_tweet(now);
436        tweet_100k.author_followers = 100_000;
437
438        let score_1k = engine.score_tweet_at(&tweet_1k, now);
439        let score_100k = engine.score_tweet_at(&tweet_100k, now);
440        assert!(
441            score_1k.follower > score_100k.follower,
442            "1K ({:.1}) should beat 100K ({:.1})",
443            score_1k.follower,
444            score_100k.follower
445        );
446    }
447
448    #[test]
449    fn score_quote_tweet_zero_content_type() {
450        let config = default_scoring_config();
451        let keywords = vec!["rust".to_string()];
452        let engine = ScoringEngine::new(config, keywords);
453        let now = Utc::now();
454
455        let mut tweet = test_tweet(now);
456        tweet.is_quote_tweet = true;
457
458        let score = engine.score_tweet_at(&tweet, now);
459        assert!((score.content_type - 0.0).abs() < 0.01);
460    }
461
462    #[test]
463    fn score_meets_threshold_above() {
464        let config = ScoringConfig {
465            threshold: 30,
466            ..default_scoring_config()
467        };
468        let keywords = vec!["rust".to_string(), "cli".to_string()];
469        let engine = ScoringEngine::new(config, keywords);
470        let now = Utc::now();
471        let tweet = test_tweet(now);
472
473        let score = engine.score_tweet_at(&tweet, now);
474        assert!(score.total >= 30.0);
475        assert!(score.meets_threshold);
476    }
477
478    #[test]
479    fn score_meets_threshold_below() {
480        let config = ScoringConfig {
481            threshold: 99,
482            ..default_scoring_config()
483        };
484        let keywords = vec!["nonexistent".to_string()];
485        let engine = ScoringEngine::new(config, keywords);
486        let now = Utc::now();
487        let mut tweet = test_tweet(now);
488        tweet.created_at = (now - Duration::hours(12)).to_rfc3339();
489        tweet.likes = 0;
490        tweet.retweets = 0;
491        tweet.replies = 0;
492
493        let score = engine.score_tweet_at(&tweet, now);
494        assert!(!score.meets_threshold);
495    }
496
497    #[test]
498    fn score_with_no_keywords() {
499        let config = default_scoring_config();
500        let engine = ScoringEngine::new(config, vec![]);
501        let now = Utc::now();
502        let tweet = test_tweet(now);
503
504        let score = engine.score_tweet_at(&tweet, now);
505        assert_eq!(score.keyword_relevance, 0.0);
506    }
507
508    // --- find_matched_keywords tests ---
509
510    #[test]
511    fn find_matched_some() {
512        let keywords = vec!["rust".to_string(), "python".to_string(), "cli".to_string()];
513        let matched = find_matched_keywords("Building a Rust CLI tool", &keywords);
514        assert!(matched.contains(&"rust".to_string()));
515        assert!(matched.contains(&"cli".to_string()));
516        assert!(!matched.contains(&"python".to_string()));
517    }
518
519    #[test]
520    fn find_matched_none() {
521        let keywords = vec!["java".to_string()];
522        let matched = find_matched_keywords("Building a Rust CLI tool", &keywords);
523        assert!(matched.is_empty());
524    }
525
526    // --- format_follower_count tests ---
527
528    #[test]
529    fn format_followers_under_1k() {
530        assert_eq!(format_follower_count(500), "500");
531    }
532
533    #[test]
534    fn format_followers_1k() {
535        assert_eq!(format_follower_count(1200), "1.2K");
536    }
537
538    #[test]
539    fn format_followers_45k() {
540        assert_eq!(format_follower_count(45300), "45.3K");
541    }
542
543    #[test]
544    fn format_followers_1m() {
545        assert_eq!(format_follower_count(1_200_000), "1.2M");
546    }
547
548    // --- format_tweet_age tests ---
549
550    #[test]
551    fn format_age_seconds() {
552        let now = Utc::now();
553        let created = (now - Duration::seconds(30)).to_rfc3339();
554        assert_eq!(format_tweet_age_at(&created, now), "30 seconds");
555    }
556
557    #[test]
558    fn format_age_minutes() {
559        let now = Utc::now();
560        let created = (now - Duration::minutes(12)).to_rfc3339();
561        assert_eq!(format_tweet_age_at(&created, now), "12 minutes");
562    }
563
564    #[test]
565    fn format_age_hours() {
566        let now = Utc::now();
567        let created = (now - Duration::hours(3)).to_rfc3339();
568        assert_eq!(format_tweet_age_at(&created, now), "3 hours");
569    }
570
571    #[test]
572    fn format_age_days() {
573        let now = Utc::now();
574        let created = (now - Duration::days(2)).to_rfc3339();
575        assert_eq!(format_tweet_age_at(&created, now), "2 days");
576    }
577
578    #[test]
579    fn format_age_invalid() {
580        assert_eq!(format_tweet_age_at("bad", Utc::now()), "unknown");
581    }
582
583    // --- truncate_text tests ---
584
585    #[test]
586    fn truncate_short_text() {
587        assert_eq!(truncate_text("short", 50), "short");
588    }
589
590    #[test]
591    fn truncate_long_text() {
592        let text = "This is a very long tweet that needs to be truncated for display";
593        let result = truncate_text(text, 20);
594        assert_eq!(result, "This is a very long ...");
595        assert!(result.len() <= 23); // 20 + "..."
596    }
597
598    // --- format_breakdown tests ---
599
600    #[test]
601    fn format_breakdown_contains_verdict() {
602        let config = default_scoring_config();
603        let now = Utc::now();
604        let tweet = test_tweet(now);
605        let score = TweetScore {
606            total: 75.0,
607            keyword_relevance: 20.0,
608            follower: 12.0,
609            recency: 8.0,
610            engagement: 10.0,
611            reply_count: 15.0,
612            content_type: 10.0,
613            meets_threshold: true,
614        };
615
616        let output = score.format_breakdown(&config, &tweet, &["rust".to_string()]);
617        assert!(output.contains("REPLY"));
618        assert!(output.contains("75/100"));
619        assert!(output.contains("@devuser"));
620        assert!(output.contains("Reply count"));
621        assert!(output.contains("Content type"));
622    }
623
624    #[test]
625    fn format_breakdown_skip_verdict() {
626        let config = default_scoring_config();
627        let now = Utc::now();
628        let tweet = test_tweet(now);
629        let score = TweetScore {
630            total: 40.0,
631            keyword_relevance: 10.0,
632            follower: 8.0,
633            recency: 5.0,
634            engagement: 7.0,
635            reply_count: 5.0,
636            content_type: 5.0,
637            meets_threshold: false,
638        };
639
640        let output = score.format_breakdown(&config, &tweet, &[]);
641        assert!(output.contains("SKIP"));
642        assert!(output.contains("40/100"));
643    }
644
645    // --- Display impl tests ---
646
647    #[test]
648    fn display_impl() {
649        let score = TweetScore {
650            total: 75.0,
651            keyword_relevance: 20.0,
652            follower: 12.0,
653            recency: 8.0,
654            engagement: 10.0,
655            reply_count: 15.0,
656            content_type: 10.0,
657            meets_threshold: true,
658        };
659        let display = format!("{score}");
660        assert!(display.contains("75/100"));
661        assert!(display.contains("REPLY"));
662        assert!(display.contains("rep:"));
663        assert!(display.contains("ct:"));
664    }
665}