use serde::{Deserialize, Serialize};
use std::sync::Arc;
use crate::error::{AiError, Result};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum SocialPlatform {
Twitter,
YouTube,
LinkedIn,
Discord,
Telegram,
Unknown,
}
impl SocialPlatform {
#[must_use]
pub fn from_url(url: &str) -> Self {
let url_lower = url.to_lowercase();
if url_lower.contains("twitter.com") || url_lower.contains("x.com") {
SocialPlatform::Twitter
} else if url_lower.contains("youtube.com") || url_lower.contains("youtu.be") {
SocialPlatform::YouTube
} else if url_lower.contains("linkedin.com") {
SocialPlatform::LinkedIn
} else if url_lower.contains("discord.com") || url_lower.contains("discord.gg") {
SocialPlatform::Discord
} else if url_lower.contains("t.me") || url_lower.contains("telegram") {
SocialPlatform::Telegram
} else {
SocialPlatform::Unknown
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TwitterPost {
pub post_id: String,
pub username: String,
pub url: String,
pub is_retweet: bool,
pub thread_position: Option<u32>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct YouTubeVideo {
pub video_id: String,
pub channel: Option<String>,
pub url: String,
pub start_time: Option<u32>,
pub is_shorts: bool,
pub is_live: bool,
pub playlist_id: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LinkedInPost {
pub activity_id: Option<String>,
pub profile: Option<String>,
pub url: String,
pub post_type: LinkedInPostType,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum LinkedInPostType {
Post,
Article,
CompanyPost,
Unknown,
}
pub struct SocialMediaParser;
impl SocialMediaParser {
#[must_use]
pub fn parse_twitter_url(url: &str) -> Option<TwitterPost> {
let url_lower = url.to_lowercase();
if !url_lower.contains("twitter.com") && !url_lower.contains("x.com") {
return None;
}
let parts: Vec<&str> = url.split('/').collect();
let status_pos = parts.iter().position(|&p| p == "status")?;
let post_id_with_params = parts.get(status_pos + 1)?;
let post_id = post_id_with_params.split('?').next()?.to_string();
if post_id.is_empty() || !post_id.chars().all(|c| c.is_ascii_digit()) {
return None;
}
let username = if parts.get(status_pos.saturating_sub(1)) == Some(&"i") {
"unknown".to_string()
} else {
(*parts.get(status_pos.saturating_sub(1))?).to_string()
};
let is_retweet = url_lower.contains("/retweet") || url_lower.contains("retweeted_status");
Some(TwitterPost {
post_id,
username,
url: url.to_string(),
is_retweet,
thread_position: None,
})
}
#[must_use]
pub fn parse_youtube_url(url: &str) -> Option<YouTubeVideo> {
let url_lower = url.to_lowercase();
if !url_lower.contains("youtube.com") && !url_lower.contains("youtu.be") {
return None;
}
let mut video_id = None;
let mut channel = None;
let mut start_time = None;
let mut playlist_id = None;
let is_shorts = url_lower.contains("/shorts/");
let is_live = url_lower.contains("/live/") || url_lower.contains("live=");
if url_lower.contains("youtu.be") {
let parts: Vec<&str> = url.split('/').collect();
if let Some(last) = parts.last() {
let id_part = last.split('?').next().unwrap_or(*last);
if !id_part.is_empty() && id_part.len() == 11 {
video_id = Some(id_part.to_string());
}
}
}
if url_lower.contains("youtube.com") {
if let Some(v_param) = Self::extract_url_param(url, "v") {
video_id = Some(v_param);
}
if is_shorts {
let parts: Vec<&str> = url.split("/shorts/").collect();
if let Some(after_shorts) = parts.get(1) {
let id = after_shorts.split(['?', '/']).next().unwrap_or("");
if !id.is_empty() {
video_id = Some(id.to_string());
}
}
}
if url_lower.contains("/live/") {
let parts: Vec<&str> = url.split("/live/").collect();
if let Some(after_live) = parts.get(1) {
let id = after_live.split(['?', '/']).next().unwrap_or("");
if !id.is_empty() {
video_id = Some(id.to_string());
}
}
}
if url_lower.contains("/embed/") {
let parts: Vec<&str> = url.split("/embed/").collect();
if let Some(after_embed) = parts.get(1) {
let id = after_embed.split(['?', '/']).next().unwrap_or("");
if !id.is_empty() {
video_id = Some(id.to_string());
}
}
}
if let Some(list) = Self::extract_url_param(url, "list") {
playlist_id = Some(list);
}
if url_lower.contains("/channel/") {
let parts: Vec<&str> = url.split("/channel/").collect();
if let Some(after_channel) = parts.get(1) {
let ch = after_channel.split(['?', '/']).next().unwrap_or("");
if !ch.is_empty() {
channel = Some(ch.to_string());
}
}
} else if url_lower.contains("/@") {
let parts: Vec<&str> = url.split("/@").collect();
if let Some(after_at) = parts.get(1) {
let handle = after_at.split(['?', '/']).next().unwrap_or("");
if !handle.is_empty() {
channel = Some(format!("@{handle}"));
}
}
}
}
if let Some(t) = Self::extract_url_param(url, "t") {
start_time = Self::parse_time_param(&t);
}
video_id.map(|vid| YouTubeVideo {
video_id: vid,
channel,
url: url.to_string(),
start_time,
is_shorts,
is_live,
playlist_id,
})
}
#[must_use]
pub fn parse_linkedin_url(url: &str) -> Option<LinkedInPost> {
let url_lower = url.to_lowercase();
if !url_lower.contains("linkedin.com") {
return None;
}
let mut activity_id = None;
let mut profile = None;
let mut post_type = LinkedInPostType::Unknown;
if url_lower.contains("/posts/") || url_lower.contains("/feed/update/") {
post_type = LinkedInPostType::Post;
if let Some(activity_part) = url.split("activity-").nth(1) {
let id = activity_part.split(['?', '/', '-']).next().unwrap_or("");
if !id.is_empty() {
activity_id = Some(id.to_string());
}
}
}
if url_lower.contains("/pulse/") {
post_type = LinkedInPostType::Article;
}
if url_lower.contains("/company/") {
post_type = LinkedInPostType::CompanyPost;
let parts: Vec<&str> = url.split("/company/").collect();
if let Some(after_company) = parts.get(1) {
let company = after_company.split(['?', '/']).next().unwrap_or("");
if !company.is_empty() {
profile = Some(company.to_string());
}
}
}
if url_lower.contains("/in/") {
let parts: Vec<&str> = url.split("/in/").collect();
if let Some(after_in) = parts.get(1) {
let username = after_in.split(['?', '/']).next().unwrap_or("");
if !username.is_empty() {
profile = Some(username.to_string());
}
}
}
Some(LinkedInPost {
activity_id,
profile,
url: url.to_string(),
post_type,
})
}
fn extract_url_param(url: &str, param: &str) -> Option<String> {
let query_start = url.find('?')?;
let query = &url[query_start + 1..];
for pair in query.split('&') {
let mut parts = pair.splitn(2, '=');
if let (Some(key), Some(value)) = (parts.next(), parts.next()) {
if key == param {
return Some(value.to_string());
}
}
}
None
}
fn parse_time_param(t: &str) -> Option<u32> {
if let Ok(secs) = t.parse::<u32>() {
return Some(secs);
}
let mut total_secs = 0u32;
let mut current_num = String::new();
for c in t.chars() {
if c.is_ascii_digit() {
current_num.push(c);
} else {
let num: u32 = current_num.parse().unwrap_or(0);
match c {
'h' => total_secs += num * 3600,
'm' => total_secs += num * 60,
's' => total_secs += num,
_ => {}
}
current_num.clear();
}
}
if !current_num.is_empty() {
if let Ok(num) = current_num.parse::<u32>() {
total_secs += num;
}
}
if total_secs > 0 {
Some(total_secs)
} else {
None
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SocialVerificationResult {
pub platform: SocialPlatform,
pub is_valid_url: bool,
pub status: VerificationStatus,
pub confidence: u32,
pub details: SocialDetails,
pub notes: Vec<String>,
pub needs_manual_review: bool,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum VerificationStatus {
Verified,
Unverified,
Suspicious,
Unsupported,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum SocialDetails {
Twitter(TwitterPost),
YouTube(YouTubeVideo),
LinkedIn(LinkedInPost),
None,
}
pub struct SocialMediaVerifier {
#[allow(dead_code)]
http_client: Arc<reqwest::Client>,
}
impl SocialMediaVerifier {
#[must_use]
pub fn new() -> Self {
Self {
http_client: Arc::new(reqwest::Client::new()),
}
}
#[must_use]
pub fn with_client(client: Arc<reqwest::Client>) -> Self {
Self {
http_client: client,
}
}
pub fn verify_url(&self, url: &str) -> Result<SocialVerificationResult> {
let platform = SocialPlatform::from_url(url);
let mut notes = Vec::new();
let mut needs_manual_review = false;
match platform {
SocialPlatform::Twitter => {
self.verify_twitter(url, &mut notes, &mut needs_manual_review)
}
SocialPlatform::YouTube => {
self.verify_youtube(url, &mut notes, &mut needs_manual_review)
}
SocialPlatform::LinkedIn => {
self.verify_linkedin(url, &mut notes, &mut needs_manual_review)
}
SocialPlatform::Discord | SocialPlatform::Telegram => {
notes.push("Platform requires manual verification".to_string());
needs_manual_review = true;
Ok(SocialVerificationResult {
platform,
is_valid_url: true,
status: VerificationStatus::Unsupported,
confidence: 20,
details: SocialDetails::None,
notes,
needs_manual_review,
})
}
SocialPlatform::Unknown => Err(AiError::VerificationFailed(
"Unknown social media platform".to_string(),
)),
}
}
fn verify_twitter(
&self,
url: &str,
notes: &mut Vec<String>,
needs_manual_review: &mut bool,
) -> Result<SocialVerificationResult> {
let parsed = SocialMediaParser::parse_twitter_url(url);
if let Some(post) = parsed {
let mut confidence = 70;
if post.username == "unknown" {
notes.push("Username could not be determined from URL".to_string());
confidence -= 20;
*needs_manual_review = true;
}
if post.is_retweet {
notes.push("URL appears to be a retweet".to_string());
confidence -= 10;
}
if post.post_id.len() < 10 || post.post_id.len() > 25 {
notes.push("Post ID has unusual length".to_string());
confidence -= 15;
}
notes.push(format!("Tweet ID: {}", post.post_id));
notes.push(format!("Username: @{}", post.username));
notes.push("Note: Content verification requires Twitter API access".to_string());
*needs_manual_review = true;
Ok(SocialVerificationResult {
platform: SocialPlatform::Twitter,
is_valid_url: true,
status: VerificationStatus::Verified,
confidence,
details: SocialDetails::Twitter(post),
notes: notes.clone(),
needs_manual_review: *needs_manual_review,
})
} else {
notes.push("Could not parse Twitter URL".to_string());
Ok(SocialVerificationResult {
platform: SocialPlatform::Twitter,
is_valid_url: false,
status: VerificationStatus::Unverified,
confidence: 0,
details: SocialDetails::None,
notes: notes.clone(),
needs_manual_review: true,
})
}
}
fn verify_youtube(
&self,
url: &str,
notes: &mut Vec<String>,
needs_manual_review: &mut bool,
) -> Result<SocialVerificationResult> {
let parsed = SocialMediaParser::parse_youtube_url(url);
if let Some(video) = parsed {
let mut confidence = 80;
if video.video_id.len() != 11 {
notes.push(format!(
"Video ID has unusual length: {} (expected 11)",
video.video_id.len()
));
confidence -= 20;
}
let valid_chars = video
.video_id
.chars()
.all(|c| c.is_alphanumeric() || c == '-' || c == '_');
if !valid_chars {
notes.push("Video ID contains invalid characters".to_string());
confidence -= 30;
}
notes.push(format!("Video ID: {}", video.video_id));
if let Some(ref channel) = video.channel {
notes.push(format!("Channel: {channel}"));
}
if video.is_shorts {
notes.push("Video type: YouTube Shorts".to_string());
}
if video.is_live {
notes.push("Video type: Live stream".to_string());
}
if let Some(t) = video.start_time {
notes.push(format!("Start time: {t}s"));
}
if let Some(ref playlist) = video.playlist_id {
notes.push(format!("Playlist: {playlist}"));
}
notes.push("Note: Content verification requires YouTube API access".to_string());
Ok(SocialVerificationResult {
platform: SocialPlatform::YouTube,
is_valid_url: true,
status: VerificationStatus::Verified,
confidence,
details: SocialDetails::YouTube(video),
notes: notes.clone(),
needs_manual_review: *needs_manual_review,
})
} else {
notes.push("Could not parse YouTube URL".to_string());
Ok(SocialVerificationResult {
platform: SocialPlatform::YouTube,
is_valid_url: false,
status: VerificationStatus::Unverified,
confidence: 0,
details: SocialDetails::None,
notes: notes.clone(),
needs_manual_review: true,
})
}
}
fn verify_linkedin(
&self,
url: &str,
notes: &mut Vec<String>,
needs_manual_review: &mut bool,
) -> Result<SocialVerificationResult> {
let parsed = SocialMediaParser::parse_linkedin_url(url);
if let Some(post) = parsed {
let mut confidence = 60;
match post.post_type {
LinkedInPostType::Post => {
notes.push("Post type: LinkedIn post/update".to_string());
}
LinkedInPostType::Article => {
notes.push("Post type: LinkedIn article".to_string());
confidence += 10; }
LinkedInPostType::CompanyPost => {
notes.push("Post type: Company page post".to_string());
}
LinkedInPostType::Unknown => {
notes.push("Could not determine post type".to_string());
confidence -= 10;
}
}
if let Some(ref activity) = post.activity_id {
notes.push(format!("Activity ID: {activity}"));
}
if let Some(ref profile) = post.profile {
notes.push(format!("Profile/Company: {profile}"));
}
notes.push("Note: LinkedIn requires login for full verification".to_string());
*needs_manual_review = true;
Ok(SocialVerificationResult {
platform: SocialPlatform::LinkedIn,
is_valid_url: true,
status: VerificationStatus::Verified,
confidence,
details: SocialDetails::LinkedIn(post),
notes: notes.clone(),
needs_manual_review: *needs_manual_review,
})
} else {
notes.push("Could not parse LinkedIn URL".to_string());
Ok(SocialVerificationResult {
platform: SocialPlatform::LinkedIn,
is_valid_url: false,
status: VerificationStatus::Unverified,
confidence: 0,
details: SocialDetails::None,
notes: notes.clone(),
needs_manual_review: true,
})
}
}
pub fn verify_urls(&self, urls: &[&str]) -> Vec<Result<SocialVerificationResult>> {
let mut results = Vec::with_capacity(urls.len());
for url in urls {
results.push(self.verify_url(url));
}
results
}
#[must_use]
pub fn summarize_results(results: &[SocialVerificationResult]) -> VerificationSummary {
let total = results.len();
let verified = results
.iter()
.filter(|r| r.status == VerificationStatus::Verified)
.count();
let needs_review = results.iter().filter(|r| r.needs_manual_review).count();
let avg_confidence = if total > 0 {
results.iter().map(|r| u64::from(r.confidence)).sum::<u64>() / total as u64
} else {
0
};
let mut platforms = std::collections::HashMap::new();
for result in results {
*platforms.entry(result.platform).or_insert(0) += 1;
}
VerificationSummary {
total_urls: total,
verified_count: verified,
needs_manual_review: needs_review,
average_confidence: avg_confidence as u32,
platforms_found: platforms,
}
}
}
impl Default for SocialMediaVerifier {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone, Serialize)]
pub struct VerificationSummary {
pub total_urls: usize,
pub verified_count: usize,
pub needs_manual_review: usize,
pub average_confidence: u32,
pub platforms_found: std::collections::HashMap<SocialPlatform, usize>,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_twitter_url_parsing() {
let url = "https://twitter.com/elonmusk/status/1234567890123456789";
let parsed = SocialMediaParser::parse_twitter_url(url).unwrap();
assert_eq!(parsed.username, "elonmusk");
assert_eq!(parsed.post_id, "1234567890123456789");
assert!(!parsed.is_retweet);
}
#[test]
fn test_twitter_x_url_parsing() {
let url = "https://x.com/username/status/9876543210?s=20";
let parsed = SocialMediaParser::parse_twitter_url(url).unwrap();
assert_eq!(parsed.username, "username");
assert_eq!(parsed.post_id, "9876543210");
}
#[test]
fn test_youtube_watch_url() {
let url = "https://www.youtube.com/watch?v=dQw4w9WgXcQ&t=30s";
let parsed = SocialMediaParser::parse_youtube_url(url).unwrap();
assert_eq!(parsed.video_id, "dQw4w9WgXcQ");
assert_eq!(parsed.start_time, Some(30));
assert!(!parsed.is_shorts);
}
#[test]
fn test_youtube_short_url() {
let url = "https://youtu.be/dQw4w9WgXcQ";
let parsed = SocialMediaParser::parse_youtube_url(url).unwrap();
assert_eq!(parsed.video_id, "dQw4w9WgXcQ");
}
#[test]
fn test_youtube_shorts_url() {
let url = "https://www.youtube.com/shorts/abc12345678";
let parsed = SocialMediaParser::parse_youtube_url(url).unwrap();
assert_eq!(parsed.video_id, "abc12345678");
assert!(parsed.is_shorts);
}
#[test]
fn test_linkedin_post_url() {
let url = "https://www.linkedin.com/posts/johndoe_activity-1234567890_test";
let parsed = SocialMediaParser::parse_linkedin_url(url).unwrap();
assert_eq!(parsed.post_type, LinkedInPostType::Post);
assert!(parsed.activity_id.is_some());
}
#[test]
fn test_platform_detection() {
assert_eq!(
SocialPlatform::from_url("https://twitter.com/user/status/123"),
SocialPlatform::Twitter
);
assert_eq!(
SocialPlatform::from_url("https://x.com/user/status/123"),
SocialPlatform::Twitter
);
assert_eq!(
SocialPlatform::from_url("https://youtube.com/watch?v=abc"),
SocialPlatform::YouTube
);
assert_eq!(
SocialPlatform::from_url("https://youtu.be/abc"),
SocialPlatform::YouTube
);
assert_eq!(
SocialPlatform::from_url("https://linkedin.com/in/user"),
SocialPlatform::LinkedIn
);
}
#[test]
fn test_time_param_parsing() {
assert_eq!(SocialMediaParser::parse_time_param("30"), Some(30));
assert_eq!(SocialMediaParser::parse_time_param("1m30s"), Some(90));
assert_eq!(SocialMediaParser::parse_time_param("1h30m"), Some(5400));
assert_eq!(SocialMediaParser::parse_time_param("2h15m30s"), Some(8130));
}
}