use std::path::Path;
use serde::{Deserialize, Serialize};
use crate::Message;
use crate::error::ChatpackError;
#[cfg(feature = "streaming")]
use crate::streaming::MessageIterator;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
#[non_exhaustive]
pub enum Platform {
#[serde(alias = "tg")]
Telegram,
#[serde(alias = "wa")]
WhatsApp,
#[serde(alias = "ig")]
Instagram,
#[serde(alias = "dc")]
Discord,
}
impl Platform {
pub fn default_extension(&self) -> &'static str {
match self {
Platform::WhatsApp => "txt",
Platform::Telegram | Platform::Instagram | Platform::Discord => "json",
}
}
pub fn all_names() -> &'static [&'static str] {
&[
"telegram",
"tg",
"whatsapp",
"wa",
"instagram",
"ig",
"discord",
"dc",
]
}
pub fn all() -> &'static [Platform] {
&[
Platform::Telegram,
Platform::WhatsApp,
Platform::Instagram,
Platform::Discord,
]
}
}
impl std::fmt::Display for Platform {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Platform::Telegram => write!(f, "Telegram"),
Platform::WhatsApp => write!(f, "WhatsApp"),
Platform::Instagram => write!(f, "Instagram"),
Platform::Discord => write!(f, "Discord"),
}
}
}
impl std::str::FromStr for Platform {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.to_lowercase().as_str() {
"telegram" | "tg" => Ok(Platform::Telegram),
"whatsapp" | "wa" => Ok(Platform::WhatsApp),
"instagram" | "ig" => Ok(Platform::Instagram),
"discord" | "dc" => Ok(Platform::Discord),
_ => Err(format!(
"Unknown platform: '{}'. Expected one of: {}",
s,
Platform::all_names().join(", ")
)),
}
}
}
#[cfg(feature = "streaming")]
pub struct ParseIterator {
inner: Box<dyn MessageIterator>,
}
#[cfg(feature = "streaming")]
impl ParseIterator {
pub fn new(inner: Box<dyn MessageIterator>) -> Self {
Self { inner }
}
pub fn progress(&self) -> Option<f64> {
self.inner.progress()
}
pub fn bytes_processed(&self) -> u64 {
self.inner.bytes_processed()
}
pub fn total_bytes(&self) -> Option<u64> {
self.inner.total_bytes()
}
}
#[cfg(feature = "streaming")]
impl Iterator for ParseIterator {
type Item = Result<Message, ChatpackError>;
fn next(&mut self) -> Option<Self::Item> {
self.inner
.next()
.map(|result| result.map_err(ChatpackError::from))
}
}
pub trait Parser: Send + Sync {
fn name(&self) -> &'static str;
fn platform(&self) -> Platform;
fn parse(&self, path: &Path) -> Result<Vec<Message>, ChatpackError>;
fn parse_str(&self, content: &str) -> Result<Vec<Message>, ChatpackError>;
fn parse_file(&self, path: &str) -> Result<Vec<Message>, ChatpackError> {
self.parse(Path::new(path))
}
fn stream(
&self,
path: &Path,
) -> Result<Box<dyn Iterator<Item = Result<Message, ChatpackError>> + Send>, ChatpackError>
{
let messages = self.parse(path)?;
Ok(Box::new(messages.into_iter().map(Ok)))
}
fn stream_file(
&self,
path: &str,
) -> Result<Box<dyn Iterator<Item = Result<Message, ChatpackError>> + Send>, ChatpackError>
{
self.stream(Path::new(path))
}
fn supports_streaming(&self) -> bool {
false
}
fn recommended_buffer_size(&self) -> usize {
64 * 1024 }
}
pub fn create_parser(platform: Platform) -> Box<dyn Parser> {
match platform {
#[cfg(feature = "telegram")]
Platform::Telegram => Box::new(crate::parsers::TelegramParser::new()),
#[cfg(feature = "whatsapp")]
Platform::WhatsApp => Box::new(crate::parsers::WhatsAppParser::new()),
#[cfg(feature = "instagram")]
Platform::Instagram => Box::new(crate::parsers::InstagramParser::new()),
#[cfg(feature = "discord")]
Platform::Discord => Box::new(crate::parsers::DiscordParser::new()),
#[allow(unreachable_patterns)]
_ => panic!(
"Parser for {:?} is not enabled. Enable the corresponding feature.",
platform
),
}
}
pub fn create_streaming_parser(platform: Platform) -> Box<dyn Parser> {
match platform {
#[cfg(feature = "telegram")]
Platform::Telegram => Box::new(crate::parsers::TelegramParser::with_streaming()),
#[cfg(feature = "whatsapp")]
Platform::WhatsApp => Box::new(crate::parsers::WhatsAppParser::with_streaming()),
#[cfg(feature = "instagram")]
Platform::Instagram => Box::new(crate::parsers::InstagramParser::with_streaming()),
#[cfg(feature = "discord")]
Platform::Discord => Box::new(crate::parsers::DiscordParser::with_streaming()),
#[allow(unreachable_patterns)]
_ => panic!(
"Streaming parser for {:?} is not enabled. Enable the corresponding feature.",
platform
),
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::str::FromStr;
#[test]
fn test_platform_from_str() {
assert_eq!(Platform::from_str("telegram").unwrap(), Platform::Telegram);
assert_eq!(Platform::from_str("tg").unwrap(), Platform::Telegram);
assert_eq!(Platform::from_str("TELEGRAM").unwrap(), Platform::Telegram);
assert_eq!(Platform::from_str("whatsapp").unwrap(), Platform::WhatsApp);
assert_eq!(Platform::from_str("wa").unwrap(), Platform::WhatsApp);
assert_eq!(
Platform::from_str("instagram").unwrap(),
Platform::Instagram
);
assert_eq!(Platform::from_str("ig").unwrap(), Platform::Instagram);
assert_eq!(Platform::from_str("discord").unwrap(), Platform::Discord);
assert_eq!(Platform::from_str("dc").unwrap(), Platform::Discord);
}
#[test]
fn test_platform_from_str_case_insensitive() {
assert_eq!(Platform::from_str("TeLegRaM").unwrap(), Platform::Telegram);
assert_eq!(Platform::from_str("TG").unwrap(), Platform::Telegram);
assert_eq!(Platform::from_str("WhAtSaPp").unwrap(), Platform::WhatsApp);
assert_eq!(Platform::from_str("WA").unwrap(), Platform::WhatsApp);
assert_eq!(
Platform::from_str("InStAgRaM").unwrap(),
Platform::Instagram
);
assert_eq!(Platform::from_str("IG").unwrap(), Platform::Instagram);
assert_eq!(Platform::from_str("DiScOrD").unwrap(), Platform::Discord);
assert_eq!(Platform::from_str("DC").unwrap(), Platform::Discord);
}
#[test]
fn test_platform_from_str_error() {
let err = Platform::from_str("unknown").unwrap_err();
assert!(err.contains("Unknown platform"));
assert!(err.contains("unknown"));
let err = Platform::from_str("").unwrap_err();
assert!(err.contains("Unknown platform"));
let err = Platform::from_str("telegramx").unwrap_err();
assert!(err.contains("Unknown platform"));
}
#[test]
fn test_platform_display() {
assert_eq!(Platform::Telegram.to_string(), "Telegram");
assert_eq!(Platform::WhatsApp.to_string(), "WhatsApp");
assert_eq!(Platform::Instagram.to_string(), "Instagram");
assert_eq!(Platform::Discord.to_string(), "Discord");
}
#[test]
fn test_platform_default_extension() {
assert_eq!(Platform::Telegram.default_extension(), "json");
assert_eq!(Platform::WhatsApp.default_extension(), "txt");
assert_eq!(Platform::Instagram.default_extension(), "json");
assert_eq!(Platform::Discord.default_extension(), "json");
}
#[test]
fn test_platform_all() {
let all = Platform::all();
assert_eq!(all.len(), 4);
assert!(all.contains(&Platform::Telegram));
assert!(all.contains(&Platform::WhatsApp));
assert!(all.contains(&Platform::Instagram));
assert!(all.contains(&Platform::Discord));
}
#[test]
fn test_platform_all_names() {
let names = Platform::all_names();
assert!(names.contains(&"telegram"));
assert!(names.contains(&"tg"));
assert!(names.contains(&"whatsapp"));
assert!(names.contains(&"wa"));
assert!(names.contains(&"instagram"));
assert!(names.contains(&"ig"));
assert!(names.contains(&"discord"));
assert!(names.contains(&"dc"));
}
#[test]
fn test_platform_serde() {
let platform = Platform::Telegram;
let json = serde_json::to_string(&platform).expect("serialize failed");
assert_eq!(json, "\"telegram\"");
let parsed: Platform = serde_json::from_str("\"telegram\"").expect("deserialize failed");
assert_eq!(parsed, Platform::Telegram);
let parsed: Platform = serde_json::from_str("\"tg\"").expect("deserialize failed");
assert_eq!(parsed, Platform::Telegram);
let parsed: Platform = serde_json::from_str("\"wa\"").expect("deserialize failed");
assert_eq!(parsed, Platform::WhatsApp);
}
#[test]
fn test_platform_serde_all_variants() {
for platform in Platform::all() {
let json = serde_json::to_string(platform).expect("serialize failed");
let parsed: Platform = serde_json::from_str(&json).expect("deserialize failed");
assert_eq!(parsed, *platform);
}
}
#[test]
fn test_platform_clone_copy() {
let p1 = Platform::Telegram;
let p2 = p1; let p3 = p1;
assert_eq!(p1, p2);
assert_eq!(p1, p3);
}
#[test]
fn test_platform_debug() {
let debug = format!("{:?}", Platform::Telegram);
assert!(debug.contains("Telegram"));
}
#[test]
fn test_platform_eq_hash() {
use std::collections::HashSet;
let mut set = HashSet::new();
set.insert(Platform::Telegram);
set.insert(Platform::WhatsApp);
set.insert(Platform::Telegram); assert_eq!(set.len(), 2);
assert!(set.contains(&Platform::Telegram));
assert!(set.contains(&Platform::WhatsApp));
}
#[cfg(feature = "telegram")]
#[test]
fn test_create_parser_telegram() {
let parser = create_parser(Platform::Telegram);
assert_eq!(parser.name(), "Telegram");
assert_eq!(parser.platform(), Platform::Telegram);
assert!(!parser.supports_streaming());
}
#[cfg(feature = "whatsapp")]
#[test]
fn test_create_parser_whatsapp() {
let parser = create_parser(Platform::WhatsApp);
assert_eq!(parser.name(), "WhatsApp");
assert_eq!(parser.platform(), Platform::WhatsApp);
}
#[cfg(feature = "instagram")]
#[test]
fn test_create_parser_instagram() {
let parser = create_parser(Platform::Instagram);
assert_eq!(parser.name(), "Instagram");
assert_eq!(parser.platform(), Platform::Instagram);
}
#[cfg(feature = "discord")]
#[test]
fn test_create_parser_discord() {
let parser = create_parser(Platform::Discord);
assert_eq!(parser.name(), "Discord");
assert_eq!(parser.platform(), Platform::Discord);
}
#[cfg(feature = "telegram")]
#[test]
fn test_create_streaming_parser_telegram() {
let parser = create_streaming_parser(Platform::Telegram);
assert_eq!(parser.name(), "Telegram");
assert!(parser.supports_streaming());
assert!(parser.recommended_buffer_size() >= 64 * 1024);
}
#[cfg(feature = "whatsapp")]
#[test]
fn test_create_streaming_parser_whatsapp() {
let parser = create_streaming_parser(Platform::WhatsApp);
assert_eq!(parser.name(), "WhatsApp");
assert!(parser.supports_streaming());
}
#[cfg(feature = "instagram")]
#[test]
fn test_create_streaming_parser_instagram() {
let parser = create_streaming_parser(Platform::Instagram);
assert_eq!(parser.name(), "Instagram");
assert!(parser.supports_streaming());
}
#[cfg(feature = "discord")]
#[test]
fn test_create_streaming_parser_discord() {
let parser = create_streaming_parser(Platform::Discord);
assert_eq!(parser.name(), "Discord");
assert!(parser.supports_streaming());
}
#[cfg(feature = "telegram")]
#[test]
fn test_parser_parse_str() {
let parser = create_parser(Platform::Telegram);
let json = r#"{"messages": [{"id": 1, "type": "message", "date_unixtime": "1234567890", "from": "Alice", "text": "Hello"}]}"#;
let messages = parser.parse_str(json).expect("parse failed");
assert_eq!(messages.len(), 1);
assert_eq!(messages[0].sender, "Alice");
assert_eq!(messages[0].content, "Hello");
}
#[cfg(feature = "telegram")]
#[test]
fn test_parser_parse_file() {
use std::io::Write;
let dir = tempfile::tempdir().expect("create temp dir");
let file_path = dir.path().join("test.json");
let mut file = std::fs::File::create(&file_path).expect("create file");
write!(file, r#"{{"messages": [{{"id": 1, "type": "message", "date_unixtime": "1234567890", "from": "Bob", "text": "Hi"}}]}}"#).expect("write");
let parser = create_parser(Platform::Telegram);
let messages = parser
.parse_file(file_path.to_str().unwrap())
.expect("parse failed");
assert_eq!(messages.len(), 1);
assert_eq!(messages[0].sender, "Bob");
}
#[cfg(all(feature = "telegram", feature = "streaming"))]
#[test]
fn test_parser_stream_file() {
use std::io::Write;
let dir = tempfile::tempdir().expect("create temp dir");
let file_path = dir.path().join("test.json");
let mut file = std::fs::File::create(&file_path).expect("create file");
writeln!(file, r"{{").expect("write");
writeln!(file, r#" "messages": ["#).expect("write");
writeln!(file, r#" {{"id": 1, "type": "message", "date_unixtime": "1234567890", "from": "Charlie", "text": "Hello"}}"#).expect("write");
writeln!(file, r" ]").expect("write");
writeln!(file, r"}}").expect("write");
file.flush().expect("flush");
drop(file);
let parser = create_streaming_parser(Platform::Telegram);
let iter = parser
.stream_file(file_path.to_str().unwrap())
.expect("stream failed");
let messages: Vec<_> = iter.filter_map(|r| r.ok()).collect();
assert_eq!(messages.len(), 1);
assert_eq!(messages[0].sender, "Charlie");
}
#[cfg(feature = "telegram")]
#[test]
fn test_parser_default_supports_streaming() {
let parser = create_parser(Platform::Telegram);
assert!(!parser.supports_streaming());
}
#[cfg(feature = "telegram")]
#[test]
fn test_parser_default_recommended_buffer_size() {
let parser = create_parser(Platform::Telegram);
assert!(parser.recommended_buffer_size() >= 64 * 1024);
}
#[cfg(all(feature = "telegram", feature = "streaming"))]
#[test]
fn test_parse_iterator_wrapper() {
use crate::streaming::StreamingParser;
use crate::streaming::TelegramStreamingParser;
use std::io::Write;
let dir = tempfile::tempdir().expect("create temp dir");
let file_path = dir.path().join("test.json");
let mut file = std::fs::File::create(&file_path).expect("create file");
writeln!(file, r"{{").expect("write");
writeln!(file, r#" "messages": ["#).expect("write");
writeln!(file, r#" {{"id": 1, "type": "message", "date_unixtime": "1234567890", "from": "Alice", "text": "Hello"}}"#).expect("write");
writeln!(file, r" ]").expect("write");
writeln!(file, r"}}").expect("write");
file.flush().expect("flush");
drop(file);
let streaming_parser = TelegramStreamingParser::new();
let inner = streaming_parser
.stream(file_path.to_str().unwrap())
.expect("stream failed");
let mut parse_iter = ParseIterator::new(inner);
assert!(parse_iter.progress().is_some() || parse_iter.progress().is_none());
assert!(parse_iter.total_bytes().is_some());
let msg = parse_iter.next().unwrap().expect("should parse");
assert_eq!(msg.sender, "Alice");
assert!(parse_iter.next().is_none());
}
#[cfg(feature = "telegram")]
#[test]
fn test_parser_stream_default_impl() {
use std::io::Write;
let dir = tempfile::tempdir().expect("create temp dir");
let file_path = dir.path().join("test.json");
let mut file = std::fs::File::create(&file_path).expect("create file");
write!(file, r#"{{"messages": [{{"id": 1, "type": "message", "date_unixtime": "1234567890", "from": "Bob", "text": "Hi"}}]}}"#).expect("write");
file.flush().expect("flush");
drop(file);
let parser = create_parser(Platform::Telegram);
let iter = parser.stream(file_path.as_ref()).expect("stream failed");
let messages: Vec<_> = iter.filter_map(|r| r.ok()).collect();
assert_eq!(messages.len(), 1);
assert_eq!(messages[0].sender, "Bob");
}
}