Skip to main content

tuitbot_core/strategy/
metrics.rs

1//! Date-ranged metric queries over existing tables.
2//!
3//! All functions query existing tables (`action_log`, `follower_snapshots`,
4//! `reply_performance`, `tweet_performance`, `original_tweets`, `replies_sent`)
5//! with date bounds. No new data collection is needed.
6
7use crate::error::StorageError;
8use crate::storage::DbPool;
9
10/// Action counts for a date range.
11#[derive(Debug, Clone, Default, serde::Serialize)]
12pub struct ActionCounts {
13    pub replies: i64,
14    pub tweets: i64,
15    pub threads: i64,
16    pub target_replies: i64,
17}
18
19/// A topic's performance within a date range.
20#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
21pub struct TopicPerformance {
22    pub topic: String,
23    pub format: String,
24    pub avg_score: f64,
25    pub post_count: i64,
26}
27
28/// A top-performing content item.
29#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
30pub struct ContentHighlight {
31    pub content_type: String,
32    pub content_preview: String,
33    pub performance_score: f64,
34    pub likes: i64,
35    pub replies_received: i64,
36}
37
38/// Count actions by type within a date range.
39///
40/// Queries the `action_log` table. The range is `[start, end)`.
41pub async fn count_actions_in_range(
42    pool: &DbPool,
43    start: &str,
44    end: &str,
45) -> Result<ActionCounts, StorageError> {
46    let rows: Vec<(String, i64)> = sqlx::query_as(
47        "SELECT action_type, COUNT(*) as cnt FROM action_log \
48         WHERE created_at >= ? AND created_at < ? AND status = 'success' \
49         GROUP BY action_type",
50    )
51    .bind(start)
52    .bind(end)
53    .fetch_all(pool)
54    .await
55    .map_err(|e| StorageError::Query { source: e })?;
56
57    let mut counts = ActionCounts::default();
58    for (action_type, count) in rows {
59        match action_type.as_str() {
60            "reply" => counts.replies = count,
61            "tweet" => counts.tweets = count,
62            "thread" => counts.threads = count,
63            "target_reply" => counts.target_replies = count,
64            _ => {}
65        }
66    }
67    Ok(counts)
68}
69
70/// Get the follower count at or before a given date.
71///
72/// Returns the nearest snapshot whose date is `<= date`.
73pub async fn get_follower_at_date(pool: &DbPool, date: &str) -> Result<Option<i64>, StorageError> {
74    let row: Option<(i64,)> = sqlx::query_as(
75        "SELECT follower_count FROM follower_snapshots \
76         WHERE snapshot_date <= ? ORDER BY snapshot_date DESC LIMIT 1",
77    )
78    .bind(date)
79    .fetch_optional(pool)
80    .await
81    .map_err(|e| StorageError::Query { source: e })?;
82
83    Ok(row.map(|r| r.0))
84}
85
86/// Average reply performance score in a date range.
87pub async fn avg_reply_score_in_range(
88    pool: &DbPool,
89    start: &str,
90    end: &str,
91) -> Result<f64, StorageError> {
92    let row: (f64,) = sqlx::query_as(
93        "SELECT COALESCE(AVG(rp.performance_score), 0.0) \
94         FROM reply_performance rp \
95         JOIN replies_sent rs ON rs.reply_tweet_id = rp.reply_id \
96         WHERE rs.created_at >= ? AND rs.created_at < ?",
97    )
98    .bind(start)
99    .bind(end)
100    .fetch_one(pool)
101    .await
102    .map_err(|e| StorageError::Query { source: e })?;
103
104    Ok(row.0)
105}
106
107/// Average tweet performance score in a date range.
108pub async fn avg_tweet_score_in_range(
109    pool: &DbPool,
110    start: &str,
111    end: &str,
112) -> Result<f64, StorageError> {
113    let row: (f64,) = sqlx::query_as(
114        "SELECT COALESCE(AVG(tp.performance_score), 0.0) \
115         FROM tweet_performance tp \
116         JOIN original_tweets ot ON ot.tweet_id = tp.tweet_id \
117         WHERE ot.created_at >= ? AND ot.created_at < ?",
118    )
119    .bind(start)
120    .bind(end)
121    .fetch_one(pool)
122    .await
123    .map_err(|e| StorageError::Query { source: e })?;
124
125    Ok(row.0)
126}
127
128/// Reply acceptance rate: fraction of replies that received at least one reply back.
129pub async fn reply_acceptance_rate(
130    pool: &DbPool,
131    start: &str,
132    end: &str,
133) -> Result<f64, StorageError> {
134    let row: (i64, i64) = sqlx::query_as(
135        "SELECT \
136            COUNT(*) as total, \
137            SUM(CASE WHEN rp.replies_received > 0 THEN 1 ELSE 0 END) as accepted \
138         FROM replies_sent rs \
139         JOIN reply_performance rp ON rp.reply_id = rs.reply_tweet_id \
140         WHERE rs.created_at >= ? AND rs.created_at < ?",
141    )
142    .bind(start)
143    .bind(end)
144    .fetch_one(pool)
145    .await
146    .map_err(|e| StorageError::Query { source: e })?;
147
148    if row.0 == 0 {
149        return Ok(0.0);
150    }
151    Ok(row.1 as f64 / row.0 as f64)
152}
153
154/// Top topics by average performance score in a date range.
155pub async fn top_topics_in_range(
156    pool: &DbPool,
157    start: &str,
158    end: &str,
159    limit: u32,
160) -> Result<Vec<TopicPerformance>, StorageError> {
161    let rows: Vec<(String, String, f64, i64)> = sqlx::query_as(
162        "SELECT ot.topic, COALESCE(ot.topic, '') as format, \
163                AVG(tp.performance_score) as avg_score, COUNT(*) as post_count \
164         FROM tweet_performance tp \
165         JOIN original_tweets ot ON ot.tweet_id = tp.tweet_id \
166         WHERE ot.created_at >= ? AND ot.created_at < ? AND ot.topic IS NOT NULL \
167         GROUP BY ot.topic \
168         HAVING post_count >= 1 \
169         ORDER BY avg_score DESC \
170         LIMIT ?",
171    )
172    .bind(start)
173    .bind(end)
174    .bind(limit)
175    .fetch_all(pool)
176    .await
177    .map_err(|e| StorageError::Query { source: e })?;
178
179    Ok(rows
180        .into_iter()
181        .map(|r| TopicPerformance {
182            topic: r.0,
183            format: r.1,
184            avg_score: r.2,
185            post_count: r.3,
186        })
187        .collect())
188}
189
190/// Bottom topics by average performance score in a date range (minimum 3 posts).
191pub async fn bottom_topics_in_range(
192    pool: &DbPool,
193    start: &str,
194    end: &str,
195    limit: u32,
196) -> Result<Vec<TopicPerformance>, StorageError> {
197    let rows: Vec<(String, String, f64, i64)> = sqlx::query_as(
198        "SELECT ot.topic, COALESCE(ot.topic, '') as format, \
199                AVG(tp.performance_score) as avg_score, COUNT(*) as post_count \
200         FROM tweet_performance tp \
201         JOIN original_tweets ot ON ot.tweet_id = tp.tweet_id \
202         WHERE ot.created_at >= ? AND ot.created_at < ? AND ot.topic IS NOT NULL \
203         GROUP BY ot.topic \
204         HAVING post_count >= 3 \
205         ORDER BY avg_score ASC \
206         LIMIT ?",
207    )
208    .bind(start)
209    .bind(end)
210    .bind(limit)
211    .fetch_all(pool)
212    .await
213    .map_err(|e| StorageError::Query { source: e })?;
214
215    Ok(rows
216        .into_iter()
217        .map(|r| TopicPerformance {
218            topic: r.0,
219            format: r.1,
220            avg_score: r.2,
221            post_count: r.3,
222        })
223        .collect())
224}
225
226/// Top-performing content items (UNION of replies + tweets) in a date range.
227pub async fn top_content_in_range(
228    pool: &DbPool,
229    start: &str,
230    end: &str,
231    limit: u32,
232) -> Result<Vec<ContentHighlight>, StorageError> {
233    let rows: Vec<(String, String, f64, i64, i64)> = sqlx::query_as(
234        "SELECT content_type, content_preview, performance_score, likes, replies_received FROM ( \
235            SELECT 'reply' as content_type, \
236                   SUBSTR(rs.reply_content, 1, 120) as content_preview, \
237                   rp.performance_score, rp.likes_received as likes, \
238                   rp.replies_received, rs.created_at as posted_at \
239            FROM reply_performance rp \
240            JOIN replies_sent rs ON rs.reply_tweet_id = rp.reply_id \
241            WHERE rs.created_at >= ? AND rs.created_at < ? \
242            UNION ALL \
243            SELECT 'tweet' as content_type, \
244                   SUBSTR(ot.content, 1, 120) as content_preview, \
245                   tp.performance_score, tp.likes_received as likes, \
246                   tp.replies_received, ot.created_at as posted_at \
247            FROM tweet_performance tp \
248            JOIN original_tweets ot ON ot.tweet_id = tp.tweet_id \
249            WHERE ot.created_at >= ? AND ot.created_at < ? \
250         ) ORDER BY performance_score DESC LIMIT ?",
251    )
252    .bind(start)
253    .bind(end)
254    .bind(start)
255    .bind(end)
256    .bind(limit)
257    .fetch_all(pool)
258    .await
259    .map_err(|e| StorageError::Query { source: e })?;
260
261    Ok(rows
262        .into_iter()
263        .map(|r| ContentHighlight {
264            content_type: r.0,
265            content_preview: r.1,
266            performance_score: r.2,
267            likes: r.3,
268            replies_received: r.4,
269        })
270        .collect())
271}
272
273/// Count distinct topics posted in a date range.
274pub async fn distinct_topic_count(
275    pool: &DbPool,
276    start: &str,
277    end: &str,
278) -> Result<i64, StorageError> {
279    let row: (i64,) = sqlx::query_as(
280        "SELECT COUNT(DISTINCT topic) FROM original_tweets \
281         WHERE created_at >= ? AND created_at < ? AND topic IS NOT NULL AND topic != ''",
282    )
283    .bind(start)
284    .bind(end)
285    .fetch_one(pool)
286    .await
287    .map_err(|e| StorageError::Query { source: e })?;
288
289    Ok(row.0)
290}
291
292#[cfg(test)]
293mod tests {
294    use super::*;
295    use crate::storage::init_test_db;
296
297    #[tokio::test]
298    async fn count_actions_empty() {
299        let pool = init_test_db().await.expect("init db");
300        let counts = count_actions_in_range(&pool, "2026-01-01T00:00:00Z", "2026-12-31T23:59:59Z")
301            .await
302            .expect("count");
303        assert_eq!(counts.replies, 0);
304        assert_eq!(counts.tweets, 0);
305    }
306
307    #[tokio::test]
308    async fn follower_at_date_empty() {
309        let pool = init_test_db().await.expect("init db");
310        let count = get_follower_at_date(&pool, "2026-12-31")
311            .await
312            .expect("get");
313        assert!(count.is_none());
314    }
315
316    #[tokio::test]
317    async fn avg_reply_score_empty() {
318        let pool = init_test_db().await.expect("init db");
319        let score = avg_reply_score_in_range(&pool, "2026-01-01T00:00:00Z", "2026-12-31T23:59:59Z")
320            .await
321            .expect("avg");
322        assert!((score - 0.0).abs() < 0.01);
323    }
324
325    #[tokio::test]
326    async fn reply_acceptance_rate_empty() {
327        let pool = init_test_db().await.expect("init db");
328        let rate = reply_acceptance_rate(&pool, "2026-01-01T00:00:00Z", "2026-12-31T23:59:59Z")
329            .await
330            .expect("rate");
331        assert!((rate - 0.0).abs() < 0.01);
332    }
333
334    #[tokio::test]
335    async fn top_topics_empty() {
336        let pool = init_test_db().await.expect("init db");
337        let topics = top_topics_in_range(&pool, "2026-01-01T00:00:00Z", "2026-12-31T23:59:59Z", 5)
338            .await
339            .expect("topics");
340        assert!(topics.is_empty());
341    }
342
343    #[tokio::test]
344    async fn top_content_empty() {
345        let pool = init_test_db().await.expect("init db");
346        let items = top_content_in_range(&pool, "2026-01-01T00:00:00Z", "2026-12-31T23:59:59Z", 5)
347            .await
348            .expect("content");
349        assert!(items.is_empty());
350    }
351
352    #[tokio::test]
353    async fn distinct_topic_count_empty() {
354        let pool = init_test_db().await.expect("init db");
355        let count = distinct_topic_count(&pool, "2026-01-01T00:00:00Z", "2026-12-31T23:59:59Z")
356            .await
357            .expect("count");
358        assert_eq!(count, 0);
359    }
360}