Skip to main content

tuitbot_core/workflow/
discover.rs

1//! Discover step: search tweets via toolkit, score, persist to DB.
2//!
3//! This is the first step in the reply pipeline: find tweets worth replying to.
4//! Routes all X API calls through `toolkit::read::search_tweets`.
5
6use std::collections::HashMap;
7
8use crate::config::Config;
9use crate::scoring::{find_matched_keywords, ScoringEngine, TweetData};
10use crate::storage;
11use crate::storage::tweets::DiscoveredTweet;
12use crate::storage::DbPool;
13use crate::toolkit;
14use crate::x_api::XApiClient;
15
16use super::{ScoreBreakdown, ScoredCandidate, WorkflowError};
17
18/// Input for the discover step.
19#[derive(Debug, Clone)]
20pub struct DiscoverInput {
21    /// Search query. If `None`, uses product keywords from config.
22    pub query: Option<String>,
23    /// Minimum score threshold. If `None`, uses `config.scoring.threshold`.
24    pub min_score: Option<f64>,
25    /// Maximum number of results (clamped to 1..100).
26    pub limit: Option<u32>,
27    /// Only return tweets newer than this ID.
28    pub since_id: Option<String>,
29}
30
31/// Output from the discover step.
32#[derive(Debug, Clone)]
33pub struct DiscoverOutput {
34    /// Scored and ranked candidates.
35    pub candidates: Vec<ScoredCandidate>,
36    /// The query that was used.
37    pub query_used: String,
38    /// The threshold that was applied.
39    pub threshold: f64,
40}
41
42/// Execute the discover step: search, score, persist, rank.
43///
44/// All X API access goes through `toolkit::read::search_tweets`.
45pub async fn execute(
46    db: &DbPool,
47    x_client: &dyn XApiClient,
48    config: &Config,
49    input: DiscoverInput,
50) -> Result<DiscoverOutput, WorkflowError> {
51    // Build query from input or product keywords
52    let search_query = match &input.query {
53        Some(q) => q.clone(),
54        None => {
55            let kw = &config.business.product_keywords;
56            if kw.is_empty() {
57                return Err(WorkflowError::InvalidInput(
58                    "No search query provided and no product_keywords configured.".to_string(),
59                ));
60            }
61            kw.join(" OR ")
62        }
63    };
64
65    let max_results = input.limit.unwrap_or(10).clamp(1, 100);
66    let threshold = input.min_score.unwrap_or(config.scoring.threshold as f64);
67
68    // Search tweets via toolkit (not direct XApiClient)
69    let search_response = toolkit::read::search_tweets(
70        x_client,
71        &search_query,
72        max_results,
73        input.since_id.as_deref(),
74        None, // no pagination token
75    )
76    .await?;
77
78    if search_response.data.is_empty() {
79        return Ok(DiscoverOutput {
80            candidates: vec![],
81            query_used: search_query,
82            threshold,
83        });
84    }
85
86    // Build author lookup from includes
87    let users: HashMap<String, &crate::x_api::types::User> = search_response
88        .includes
89        .as_ref()
90        .map(|inc| inc.users.iter().map(|u| (u.id.clone(), u)).collect())
91        .unwrap_or_default();
92
93    // Build scoring engine
94    let keywords: Vec<String> = config
95        .business
96        .product_keywords
97        .iter()
98        .chain(config.business.competitor_keywords.iter())
99        .chain(config.business.effective_industry_topics().iter())
100        .cloned()
101        .collect();
102    let engine = ScoringEngine::new(config.scoring.clone(), keywords.clone());
103
104    let mut candidates = Vec::new();
105
106    for tweet in &search_response.data {
107        let user = users.get(&tweet.author_id);
108        let author_username = user.map(|u| u.username.as_str()).unwrap_or("unknown");
109        let author_followers = user.map(|u| u.public_metrics.followers_count).unwrap_or(0);
110
111        let tweet_data = TweetData {
112            text: tweet.text.clone(),
113            created_at: tweet.created_at.clone(),
114            likes: tweet.public_metrics.like_count,
115            retweets: tweet.public_metrics.retweet_count,
116            replies: tweet.public_metrics.reply_count,
117            author_username: author_username.to_string(),
118            author_followers,
119            has_media: false,
120            is_quote_tweet: false,
121        };
122
123        let score = engine.score_tweet(&tweet_data);
124        let matched = find_matched_keywords(&tweet.text, &keywords);
125
126        // Persist to DB (best-effort)
127        let discovered = DiscoveredTweet {
128            id: tweet.id.clone(),
129            author_id: tweet.author_id.clone(),
130            author_username: author_username.to_string(),
131            content: tweet.text.clone(),
132            like_count: tweet.public_metrics.like_count as i64,
133            retweet_count: tweet.public_metrics.retweet_count as i64,
134            reply_count: tweet.public_metrics.reply_count as i64,
135            impression_count: Some(tweet.public_metrics.impression_count as i64),
136            relevance_score: Some(score.total as f64),
137            matched_keyword: matched.first().cloned(),
138            discovered_at: tweet.created_at.clone(),
139            replied_to: 0,
140        };
141        let _ = storage::tweets::insert_discovered_tweet(db, &discovered).await;
142
143        // Check if already replied
144        let already_replied = storage::replies::has_replied_to(db, &tweet.id)
145            .await
146            .unwrap_or(false);
147
148        // Determine recommended action
149        let recommended_action = if (score.total as f64) >= threshold + 15.0 {
150            "strong_reply"
151        } else if (score.total as f64) >= threshold {
152            "consider"
153        } else {
154            "skip"
155        };
156
157        candidates.push(ScoredCandidate {
158            tweet_id: tweet.id.clone(),
159            author_username: author_username.to_string(),
160            author_followers,
161            text: tweet.text.clone(),
162            created_at: tweet.created_at.clone(),
163            score_total: score.total,
164            score_breakdown: ScoreBreakdown {
165                keyword_relevance: score.keyword_relevance,
166                follower: score.follower,
167                recency: score.recency,
168                engagement: score.engagement,
169                reply_count: score.reply_count,
170                content_type: score.content_type,
171            },
172            matched_keywords: matched,
173            recommended_action: recommended_action.to_string(),
174            already_replied,
175        });
176    }
177
178    // Filter by threshold, sort desc, take limit
179    candidates.retain(|c| (c.score_total as f64) >= threshold);
180    candidates.sort_by(|a, b| {
181        b.score_total
182            .partial_cmp(&a.score_total)
183            .unwrap_or(std::cmp::Ordering::Equal)
184    });
185    candidates.truncate(max_results as usize);
186
187    Ok(DiscoverOutput {
188        candidates,
189        query_used: search_query,
190        threshold,
191    })
192}