Skip to main content

tuitbot_core/toolkit/profile_inference/
mod.rs

1//! Profile inference engine for onboarding.
2//!
3//! Analyzes a connected X account's profile and recent tweets to produce
4//! normalized onboarding suggestions with confidence scores and provenance.
5//!
6//! Two-pass architecture:
7//! 1. **Heuristics** — deterministic extraction from bio, display name, and profile URL.
8//! 2. **LLM enrichment** — optional semantic analysis for fields that benefit from it
9//!    (target audience, keywords, topics, brand voice).
10
11mod heuristics;
12mod llm_enrichment;
13
14#[cfg(test)]
15mod tests;
16
17use serde::{Deserialize, Serialize};
18
19use crate::x_api::types::{Tweet, User};
20
21// Re-export submodule entry points.
22pub use heuristics::extract_heuristics;
23pub use llm_enrichment::enrich_with_llm;
24
25/// How confident the inference is.
26#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
27#[serde(rename_all = "snake_case")]
28pub enum Confidence {
29    High,
30    Medium,
31    Low,
32}
33
34/// Where the inferred value came from.
35#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
36#[serde(rename_all = "snake_case")]
37pub enum Provenance {
38    Bio,
39    Tweets,
40    BioAndTweets,
41    ProfileUrl,
42    DisplayName,
43    Default,
44}
45
46/// A single inferred field with confidence and provenance metadata.
47#[derive(Debug, Clone, Serialize, Deserialize)]
48pub struct InferredField<T: Serialize> {
49    pub value: T,
50    pub confidence: Confidence,
51    pub provenance: Provenance,
52}
53
54/// The complete set of inferred onboarding fields.
55#[derive(Debug, Clone, Serialize, Deserialize)]
56pub struct InferredProfile {
57    pub account_type: InferredField<String>,
58    pub product_name: InferredField<String>,
59    pub product_description: InferredField<String>,
60    pub product_url: InferredField<Option<String>>,
61    pub target_audience: InferredField<String>,
62    pub product_keywords: InferredField<Vec<String>>,
63    pub industry_topics: InferredField<Vec<String>>,
64    pub brand_voice: InferredField<Option<String>>,
65}
66
67/// Raw input data for the inference pipeline.
68pub struct ProfileInput {
69    pub user: User,
70    pub tweets: Vec<Tweet>,
71}
72
73/// Compute the base confidence level from available input data.
74///
75/// Per the inference contract:
76/// - bio > 20 chars AND >= 10 tweets → High
77/// - bio > 0 chars OR >= 5 tweets → Medium
78/// - otherwise → Low
79pub fn compute_base_confidence(bio_len: usize, tweet_count: usize) -> Confidence {
80    if bio_len > 20 && tweet_count >= 10 {
81        Confidence::High
82    } else if bio_len > 0 || tweet_count >= 5 {
83        Confidence::Medium
84    } else {
85        Confidence::Low
86    }
87}