use chrono::{DateTime, NaiveDateTime, Utc};
use regex::Regex;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum DateFormat {
US,
EuDotBracketed,
EuDotNoBracket,
EuSlash,
EuSlashBracketed,
}
impl DateFormat {
pub fn pattern(self) -> &'static str {
match self {
DateFormat::US => {
r"^\[(\d{1,2}/\d{1,2}/\d{2,4}),\s(\d{1,2}:\d{2}(?::\d{2})?(?:\s?[APap][Mm])?)\]\s([^:]+):\s?(.*)"
}
DateFormat::EuDotBracketed => {
r"^\[(\d{2}\.\d{2}\.\d{2,4}),\s(\d{2}:\d{2}(?::\d{2})?)\]\s([^:]+):\s?(.*)"
}
DateFormat::EuDotNoBracket => {
r"^(\d{2}\.\d{2}\.\d{2,4}),\s(\d{2}:\d{2}(?::\d{2})?)\s-\s([^:]+):\s?(.*)"
}
DateFormat::EuSlash => {
r"^(\d{2}/\d{2}/\d{2,4}),\s(\d{2}:\d{2}(?::\d{2})?)\s-\s([^:]+):\s?(.*)"
}
DateFormat::EuSlashBracketed => {
r"^\[(\d{2}/\d{2}/\d{2,4}),\s(\d{2}:\d{2}(?::\d{2})?)\]\s([^:]+):\s?(.*)"
}
}
}
pub fn date_parse_formats(self) -> &'static [&'static str] {
match self {
DateFormat::US => &[
"%m/%d/%y, %I:%M:%S %p",
"%m/%d/%y, %I:%M %p",
"%m/%d/%Y, %I:%M:%S %p",
"%m/%d/%Y, %I:%M %p",
"%m/%d/%y, %H:%M:%S",
"%m/%d/%y, %H:%M",
"%m/%d/%Y, %H:%M:%S",
"%m/%d/%Y, %H:%M",
],
DateFormat::EuDotBracketed | DateFormat::EuDotNoBracket => &[
"%d.%m.%y, %H:%M:%S",
"%d.%m.%y, %H:%M",
"%d.%m.%Y, %H:%M:%S",
"%d.%m.%Y, %H:%M",
],
DateFormat::EuSlash | DateFormat::EuSlashBracketed => &[
"%d/%m/%y, %H:%M:%S",
"%d/%m/%y, %H:%M",
"%d/%m/%Y, %H:%M:%S",
"%d/%m/%Y, %H:%M",
],
}
}
pub fn all() -> &'static [DateFormat] {
&[
DateFormat::US,
DateFormat::EuDotBracketed,
DateFormat::EuDotNoBracket,
DateFormat::EuSlash,
DateFormat::EuSlashBracketed,
]
}
}
pub fn parse_whatsapp_timestamp(
date_str: &str,
time_str: &str,
format: DateFormat,
) -> Option<DateTime<Utc>> {
let datetime_str = format!("{date_str}, {time_str}");
for parse_format in format.date_parse_formats() {
if let Ok(naive) = NaiveDateTime::parse_from_str(&datetime_str, parse_format) {
return Some(naive.and_utc());
}
}
None
}
pub fn is_whatsapp_system_message(sender: &str, content: &str) -> bool {
let system_indicators_en = [
"Messages and calls are end-to-end encrypted",
"created group",
"added",
"removed",
"left",
"changed the subject",
"changed this group's icon",
"changed the group description",
"deleted this group's icon",
"changed their phone number",
"joined using this group's invite link",
"security code changed",
"You're now an admin",
"is now an admin",
"disappeared",
"turned on disappearing messages",
"turned off disappearing messages",
];
let system_indicators_ru = [
"Сообщения и звонки защищены сквозным шифрованием",
"создал(а) группу",
"добавил",
"удалил",
"вышел",
"покинул",
"изменил тему",
"изменил иконку группы",
"изменил описание группы",
"удалил иконку группы",
"изменил номер телефона",
"присоединился по ссылке",
"код безопасности изменён",
"теперь администратор",
"включил исчезающие сообщения",
"выключил исчезающие сообщения",
"Подробнее",
];
let content_lower = content.to_lowercase();
let sender_lower = sender.to_lowercase();
for indicator in &system_indicators_en {
if content_lower.contains(&indicator.to_lowercase()) {
return true;
}
}
for indicator in &system_indicators_ru {
if content.contains(indicator) {
return true;
}
}
sender.trim().is_empty() || sender_lower.contains("whatsapp") || sender_lower.contains("system")
}
struct FormatDetector {
format: DateFormat,
regex: Regex,
}
impl FormatDetector {
fn new(format: DateFormat) -> Self {
Self {
format,
regex: Regex::new(format.pattern()).unwrap(),
}
}
fn matches(&self, line: &str) -> bool {
self.regex.is_match(line)
}
}
pub fn detect_whatsapp_format(lines: &[&str]) -> Option<DateFormat> {
let detectors: Vec<FormatDetector> = DateFormat::all()
.iter()
.map(|&f| FormatDetector::new(f))
.collect();
let mut scores = vec![0usize; detectors.len()];
for line in lines {
for (i, detector) in detectors.iter().enumerate() {
if detector.matches(line) {
scores[i] += 1;
}
}
}
let max_score = *scores.iter().max()?;
if max_score == 0 {
return None;
}
let winner_idx = scores.iter().position(|&s| s == max_score)?;
Some(detectors[winner_idx].format)
}
pub fn detect_whatsapp_format_owned(lines: &[String]) -> Option<DateFormat> {
let refs: Vec<&str> = lines.iter().map(String::as_str).collect();
detect_whatsapp_format(&refs)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_detect_format_us() {
let lines = vec![
"[1/15/24, 10:30:45 AM] Alice: Hello",
"[1/15/24, 10:31:00 AM] Bob: Hi there",
];
assert_eq!(detect_whatsapp_format(&lines), Some(DateFormat::US));
}
#[test]
fn test_detect_format_eu_dot_bracketed() {
let lines = vec![
"[15.01.24, 10:30:45] Alice: Hello",
"[15.01.24, 10:31:00] Bob: Hi there",
];
assert_eq!(
detect_whatsapp_format(&lines),
Some(DateFormat::EuDotBracketed)
);
}
#[test]
fn test_detect_format_eu_dot_no_bracket() {
let lines = vec![
"26.10.2025, 20:40 - Alice: Hello",
"26.10.2025, 20:41 - Bob: Hi there",
];
assert_eq!(
detect_whatsapp_format(&lines),
Some(DateFormat::EuDotNoBracket)
);
}
#[test]
fn test_detect_format_eu_slash() {
let lines = vec![
"15/01/2024, 10:30 - Alice: Hello",
"15/01/2024, 10:31 - Bob: Hi there",
];
assert_eq!(detect_whatsapp_format(&lines), Some(DateFormat::EuSlash));
}
#[test]
fn test_is_system_message_english() {
assert!(is_whatsapp_system_message(
"Alice",
"Messages and calls are end-to-end encrypted"
));
assert!(is_whatsapp_system_message(
"Bob",
"added Charlie to the group"
));
assert!(is_whatsapp_system_message("Alice", "left"));
assert!(!is_whatsapp_system_message("Alice", "Hello everyone!"));
assert!(!is_whatsapp_system_message("Bob", "<Media omitted>"));
}
#[test]
fn test_is_system_message_russian() {
assert!(is_whatsapp_system_message(
"Система",
"Сообщения и звонки защищены сквозным шифрованием"
));
assert!(is_whatsapp_system_message("Bob", "Подробнее"));
assert!(!is_whatsapp_system_message("Муха", "Добрый вечер"));
}
#[test]
fn test_parse_timestamp_us() {
let ts = parse_whatsapp_timestamp("1/15/24", "10:30:45 AM", DateFormat::US);
assert!(ts.is_some());
}
#[test]
fn test_parse_timestamp_eu_dot() {
let ts = parse_whatsapp_timestamp("15.01.24", "10:30:45", DateFormat::EuDotBracketed);
assert!(ts.is_some());
let ts2 = parse_whatsapp_timestamp("26.10.2025", "20:40", DateFormat::EuDotNoBracket);
assert!(ts2.is_some());
}
#[test]
fn test_empty_sender_is_system() {
assert!(is_whatsapp_system_message("", "Some message"));
assert!(is_whatsapp_system_message(" ", "Some message"));
}
}