use chrono::{DateTime, TimeZone, Utc};
use serde::Deserialize;
use crate::Message;
#[derive(Debug, Deserialize)]
pub struct InstagramRawMessage {
pub sender_name: String,
pub timestamp_ms: i64,
pub content: Option<String>,
pub share: Option<InstagramShare>,
#[serde(default)]
pub photos: Option<Vec<InstagramMedia>>,
#[serde(default)]
pub videos: Option<Vec<InstagramMedia>>,
#[serde(default)]
pub audio_files: Option<Vec<InstagramMedia>>,
}
#[derive(Debug, Deserialize)]
pub struct InstagramShare {
pub share_text: Option<String>,
pub link: Option<String>,
}
#[derive(Debug, Deserialize)]
pub struct InstagramMedia {
pub uri: Option<String>,
}
#[derive(Debug, Deserialize)]
pub struct InstagramExport {
pub messages: Vec<InstagramRawMessage>,
}
pub fn fix_mojibake_encoding(s: &str) -> String {
if s.is_ascii() {
return s.to_string();
}
let bytes: Vec<u8> = s.chars().map(|c| c as u8).collect();
String::from_utf8(bytes).unwrap_or_else(|_| s.to_string())
}
pub fn parse_ms_timestamp(timestamp_ms: i64) -> Option<DateTime<Utc>> {
Utc.timestamp_millis_opt(timestamp_ms).single()
}
pub fn parse_instagram_message_owned(
msg: InstagramRawMessage,
fix_encoding: bool,
) -> Option<Message> {
let content = msg.content.or_else(|| msg.share.and_then(|s| s.share_text));
let content = content.map(|c| {
if fix_encoding && !c.is_ascii() {
fix_mojibake_encoding(&c)
} else {
c }
});
let content = match content {
Some(c) if !c.trim().is_empty() => c,
_ => return None,
};
let timestamp = parse_ms_timestamp(msg.timestamp_ms);
let sender = if fix_encoding && !msg.sender_name.is_ascii() {
fix_mojibake_encoding(&msg.sender_name)
} else {
msg.sender_name };
Some(Message::with_metadata(
sender, content, timestamp, None, None, None, ))
}
pub fn parse_instagram_message(msg: &InstagramRawMessage, fix_encoding: bool) -> Option<Message> {
let content = msg
.content
.as_ref()
.or_else(|| msg.share.as_ref().and_then(|s| s.share_text.as_ref()));
let content = content.map(|c| {
if fix_encoding && !c.is_ascii() {
fix_mojibake_encoding(c)
} else {
c.clone()
}
});
let content = match content {
Some(c) if !c.trim().is_empty() => c,
_ => return None,
};
let timestamp = parse_ms_timestamp(msg.timestamp_ms);
let sender = if fix_encoding && !msg.sender_name.is_ascii() {
fix_mojibake_encoding(&msg.sender_name)
} else {
msg.sender_name.clone()
};
Some(Message::with_metadata(
sender, content, timestamp, None, None, None, ))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_fix_encoding_ascii() {
assert_eq!(fix_mojibake_encoding("Hello"), "Hello");
assert_eq!(fix_mojibake_encoding("Test 123"), "Test 123");
}
#[test]
fn test_parse_ms_timestamp() {
let ts = parse_ms_timestamp(1705315800000);
assert!(ts.is_some());
}
#[test]
fn test_parse_instagram_message_basic() {
let msg = InstagramRawMessage {
sender_name: "user_one".to_string(),
timestamp_ms: 1705315800000,
content: Some("Hello!".to_string()),
share: None,
photos: None,
videos: None,
audio_files: None,
};
let result = parse_instagram_message(&msg, false);
assert!(result.is_some());
let parsed = result.unwrap();
assert_eq!(parsed.sender, "user_one");
assert_eq!(parsed.content, "Hello!");
}
#[test]
fn test_parse_instagram_message_with_share() {
let msg = InstagramRawMessage {
sender_name: "user".to_string(),
timestamp_ms: 1705315800000,
content: None,
share: Some(InstagramShare {
share_text: Some("Check this out!".to_string()),
link: Some("https://example.com".to_string()),
}),
photos: None,
videos: None,
audio_files: None,
};
let result = parse_instagram_message(&msg, false);
assert!(result.is_some());
let parsed = result.unwrap();
assert_eq!(parsed.content, "Check this out!");
}
#[test]
fn test_parse_instagram_message_empty() {
let msg = InstagramRawMessage {
sender_name: "user".to_string(),
timestamp_ms: 1705315800000,
content: None,
share: None,
photos: None,
videos: None,
audio_files: None,
};
assert!(parse_instagram_message(&msg, false).is_none());
}
}