use proptest::prelude::*;
use serde_json::{Value, json};
use chatpack::core::output::{to_csv, to_json, to_jsonl};
use chatpack::core::{FilterConfig, Message, OutputConfig, apply_filters, merge_consecutive};
use chatpack::parsing::discord::{
DiscordAttachment, DiscordAuthor, DiscordRawMessage, DiscordReference, DiscordSticker,
parse_discord_message,
};
use chatpack::parsing::instagram::{
InstagramRawMessage, InstagramShare, fix_mojibake_encoding, parse_instagram_message,
parse_instagram_message_owned,
};
use chatpack::parsing::telegram::{
TelegramRawMessage, extract_telegram_text, parse_telegram_message, parse_unix_timestamp,
};
use chatpack::parsing::whatsapp::{
DateFormat, detect_whatsapp_format, is_whatsapp_system_message, parse_whatsapp_timestamp,
};
fn arb_message() -> impl Strategy<Value = Message> {
(
prop::sample::select(vec![
"Alice".to_string(),
"Bob".to_string(),
"Charlie".to_string(),
"User123".to_string(),
"ะะฒะฐะฝ".to_string(),
"Test".to_string(),
"ๆฅๆฌ่ช".to_string(),
"ู
ุณุชุฎุฏู
".to_string(),
]),
prop::sample::select(vec![
"Hello".to_string(),
"Hi there!".to_string(),
"How are you?".to_string(),
"Good morning".to_string(),
"Test message 123".to_string(),
"ะัะธะฒะตั ะผะธั".to_string(),
String::new(),
" ".to_string(),
"Special;chars\"here\nnewline".to_string(),
"๐๐ฅ๐ emoji".to_string(),
"ๆฅๆฌ่ชใในใ".to_string(),
"ู
ุฑุญุจุง ุจุงูุนุงูู
".to_string(),
"Mixed ะขะตัั ๆฅๆฌ ๐".to_string(),
"\t\n\r special whitespace".to_string(),
"a]b[c{d}e".to_string(),
]),
)
.prop_map(|(sender, content)| Message {
sender,
content,
timestamp: None,
id: None,
reply_to: None,
edited: None,
})
}
fn arb_message_with_timestamp() -> impl Strategy<Value = Message> {
(
prop::sample::select(vec!["Alice", "Bob", "Charlie", "User"]),
prop::sample::select(vec!["Hello", "Test", "Message", ""]),
prop::option::of(1700000000i64..1800000000i64),
)
.prop_map(|(sender, content, ts_opt)| {
let mut msg = Message::new(sender, content);
if let Some(ts) = ts_opt {
msg.timestamp = chrono::DateTime::from_timestamp(ts, 0);
}
msg
})
}
fn arb_messages(max_len: usize) -> impl Strategy<Value = Vec<Message>> {
prop::collection::vec(arb_message(), 0..max_len)
}
fn arb_messages_with_ts(max_len: usize) -> impl Strategy<Value = Vec<Message>> {
prop::collection::vec(arb_message_with_timestamp(), 0..max_len)
}
fn arb_telegram_text_value() -> impl Strategy<Value = Value> {
prop_oneof![
prop::sample::select(vec![
json!("Hello"),
json!("Test message"),
json!("ะัะธะฒะตั ะผะธั"),
json!("๐ emoji"),
json!(""),
json!(" "),
]),
prop::sample::select(vec![
json!(["Hello", " ", "World"]),
json!(["Part1", "Part2", "Part3"]),
json!([]),
]),
prop::sample::select(vec![
json!(["Check: ", {"type": "link", "text": "https://example.com"}]),
json!([{"type": "mention", "text": "@user"}, " said hello"]),
json!(["Mixed ", {"type": "bold", "text": "bold"}, " text"]),
]),
prop::sample::select(vec![
json!(["A", {"type": "link", "text": "B"}, "C", {"type": "mention", "text": "D"}]),
json!([{"type": "unknown"}, "text"]), ]),
prop::sample::select(vec![
json!(null),
json!(123),
json!(true),
json!({"text": "object at root"}),
]),
]
}
fn arb_unix_timestamp_str() -> impl Strategy<Value = String> {
prop_oneof![
(1000000000i64..2000000000i64).prop_map(|ts| ts.to_string()),
Just("0".to_string()),
Just("1".to_string()),
Just("-1".to_string()),
Just("9999999999".to_string()),
Just(String::new()),
Just("not_a_number".to_string()),
Just("12.34".to_string()),
Just("123abc".to_string()),
]
}
fn arb_whatsapp_line() -> impl Strategy<Value = String> {
prop_oneof![
Just("[1/15/24, 10:30:45 AM] Alice: Hello".to_string()),
Just("[12/31/23, 11:59:59 PM] Bob: Bye".to_string()),
Just("[15.01.24, 10:30:45] Alice: Hello".to_string()),
Just("[31.12.23, 23:59:59] Bob: Bye".to_string()),
Just("15.01.2024, 10:30 - Alice: Hello".to_string()),
Just("26.10.2025, 20:40 - Bob: Test".to_string()),
Just("15/01/2024, 10:30 - Alice: Hello".to_string()),
Just("31/12/2023, 23:59 - Bob: Bye".to_string()),
Just("[15/01/2024, 10:30:45] Alice: Hello".to_string()),
Just("This is a continuation".to_string()),
Just("No date format here".to_string()),
Just(String::new()),
]
}
fn arb_sender() -> impl Strategy<Value = String> {
prop::sample::select(vec![
"Alice".to_string(),
"Bob".to_string(),
"WhatsApp".to_string(),
"System".to_string(),
String::new(),
" ".to_string(),
"ะะฒะฐะฝ".to_string(),
"user123".to_string(),
])
}
fn arb_content_for_system_check() -> impl Strategy<Value = String> {
prop::sample::select(vec![
"created group \"Test\"".to_string(),
"added Charlie to the group".to_string(),
"removed Dave from the group".to_string(),
"changed the subject to \"New Name\"".to_string(),
"security code changed".to_string(),
"You're now an admin".to_string(),
"ะกะพะพะฑัะตะฝะธั ะธ ะทะฒะพะฝะบะธ ะทะฐัะธัะตะฝั ัะบะฒะพะทะฝัะผ ัะธััะพะฒะฐะฝะธะตะผ".to_string(),
"ัะพะทะดะฐะป(ะฐ) ะณััะฟะฟั".to_string(),
])
}
fn arb_mojibake_input() -> impl Strategy<Value = String> {
prop::sample::select(vec![
"Hello World".to_string(),
"Test 123".to_string(),
String::new(),
"ะัะธะฒะตั".to_string(),
"ๆฅๆฌ่ช".to_string(),
"๐๐ฅ".to_string(),
"ู
ุฑุญุจุง".to_string(),
"Hello ะัะธะฒะตั ๆฅๆฌ".to_string(),
])
}
fn arb_discord_raw_message() -> impl Strategy<Value = DiscordRawMessage> {
(
prop::sample::select(vec!["123", "456789", "999999999999999999"]),
prop::sample::select(vec![
"2024-01-15T10:30:00+00:00",
"2024-12-31T23:59:59+00:00",
"2020-01-01T00:00:00Z",
]),
prop::option::of(prop::sample::select(vec![
"2024-01-15T11:00:00+00:00",
"2024-12-31T23:59:59+00:00",
])),
prop::sample::select(vec![
"Hello".to_string(),
"Test message".to_string(),
String::new(),
" ".to_string(),
"ะัะธะฒะตั ๐".to_string(),
]),
prop::sample::select(vec!["alice", "bob123", "ะขะตัั"]),
prop::option::of(prop::sample::select(vec!["Alice", "Bob Display", "ะะธะบ"])),
prop::option::of(prop::sample::select(vec!["111", "222"])),
prop::bool::ANY,
prop::bool::ANY,
)
.prop_map(
|(id, ts, ts_edited, content, name, nickname, ref_id, has_attach, has_sticker)| {
DiscordRawMessage {
id: id.to_string(),
timestamp: ts.to_string(),
timestamp_edited: ts_edited.map(|s| s.to_string()),
content,
author: DiscordAuthor {
name: name.to_string(),
nickname: nickname.map(|s| s.to_string()),
},
reference: ref_id.map(|id| DiscordReference {
message_id: Some(id.to_string()),
}),
attachments: if has_attach {
Some(vec![DiscordAttachment {
file_name: "test.png".to_string(),
}])
} else {
None
},
stickers: if has_sticker {
Some(vec![DiscordSticker {
name: "TestSticker".to_string(),
}])
} else {
None
},
}
},
)
}
fn arb_instagram_raw_message() -> impl Strategy<Value = InstagramRawMessage> {
(
prop::sample::select(vec!["user_one", "user_two", "ะขะตัั", "ๆฅๆฌ"]),
1700000000000i64..1800000000000i64,
prop::option::of(prop::sample::select(vec![
"Hello!".to_string(),
"ะัะธะฒะตั".to_string(),
String::new(),
" ".to_string(),
"๐ emoji".to_string(),
])),
prop::bool::ANY,
)
.prop_map(|(sender, ts, content, has_share)| InstagramRawMessage {
sender_name: sender.to_string(),
timestamp_ms: ts,
content,
share: if has_share {
Some(InstagramShare {
share_text: Some("Shared content".to_string()),
link: Some("https://example.com".to_string()),
})
} else {
None
},
photos: None,
videos: None,
audio_files: None,
})
}
fn arb_date_string() -> impl Strategy<Value = String> {
prop_oneof![
Just("2024-01-15".to_string()),
Just("2024-12-31".to_string()),
Just("2000-01-01".to_string()),
Just("2099-12-31".to_string()),
Just("2024-02-29".to_string()),
Just("01-15-2024".to_string()),
Just("15/01/2024".to_string()),
Just("2024/01/15".to_string()),
Just("not-a-date".to_string()),
Just(String::new()),
Just("2023-02-29".to_string()), Just("2024-13-01".to_string()), Just("2024-01-32".to_string()), ]
}
proptest! {
#![proptest_config(ProptestConfig::with_cases(200))]
#[test]
fn merge_never_increases_count(messages in arb_messages(30)) {
let original_len = messages.len();
let merged = merge_consecutive(messages);
prop_assert!(merged.len() <= original_len);
}
#[test]
fn merge_empty_is_empty(_dummy in Just(())) {
let result = merge_consecutive(vec![]);
prop_assert!(result.is_empty());
}
#[test]
fn merge_single_stays_single(msg in arb_message()) {
let result = merge_consecutive(vec![msg]);
prop_assert_eq!(result.len(), 1);
}
#[test]
fn merge_same_sender_produces_one(n in 1usize..10) {
let messages: Vec<Message> = (0..n)
.map(|i| Message {
sender: "Alice".to_string(),
content: format!("Message {}", i),
timestamp: None,
id: None,
reply_to: None,
edited: None,
})
.collect();
let merged = merge_consecutive(messages);
prop_assert_eq!(merged.len(), 1);
}
#[test]
fn merge_preserves_content_parts(n in 1usize..5) {
let messages: Vec<Message> = (0..n)
.map(|i| Message {
sender: "Alice".to_string(),
content: format!("part{}", i),
timestamp: None,
id: None,
reply_to: None,
edited: None,
})
.collect();
let merged = merge_consecutive(messages);
for i in 0..n {
let expected = format!("part{}", i);
prop_assert!(merged[0].content.contains(&expected), "Missing part{}", i);
}
}
#[test]
fn merge_is_idempotent(messages in arb_messages(20)) {
let first_merge = merge_consecutive(messages.clone());
let second_merge = merge_consecutive(first_merge.clone());
prop_assert_eq!(first_merge.len(), second_merge.len());
for (m1, m2) in first_merge.iter().zip(second_merge.iter()) {
prop_assert_eq!(&m1.sender, &m2.sender);
prop_assert_eq!(&m1.content, &m2.content);
}
}
#[test]
fn merge_alternating_preserves_count(n in 1usize..10) {
let messages: Vec<Message> = (0..n)
.map(|i| Message {
sender: if i % 2 == 0 { "Alice" } else { "Bob" }.to_string(),
content: format!("Msg {}", i),
timestamp: None,
id: None,
reply_to: None,
edited: None,
})
.collect();
let merged = merge_consecutive(messages.clone());
prop_assert_eq!(merged.len(), messages.len());
}
}
proptest! {
#![proptest_config(ProptestConfig::with_cases(200))]
#[test]
fn filter_never_increases_count(messages in arb_messages(30)) {
let original_len = messages.len();
let config = FilterConfig::new();
let filtered = apply_filters(messages, &config);
prop_assert!(filtered.len() <= original_len);
}
#[test]
fn no_filter_is_passthrough(messages in arb_messages(30)) {
let original_len = messages.len();
let config = FilterConfig::new();
let filtered = apply_filters(messages, &config);
prop_assert_eq!(filtered.len(), original_len);
}
#[test]
fn user_filter_only_keeps_matching(messages in arb_messages(30)) {
let config = FilterConfig::new().with_user("Alice".to_string());
let filtered = apply_filters(messages, &config);
for msg in &filtered {
prop_assert!(
msg.sender.eq_ignore_ascii_case("Alice"),
"Found non-matching sender: {}", msg.sender
);
}
}
#[test]
fn filter_is_subset(messages in arb_messages_with_ts(20)) {
let config = FilterConfig::new().with_user("Alice".to_string());
let original_senders: Vec<_> = messages.iter().map(|m| m.sender.clone()).collect();
let filtered = apply_filters(messages, &config);
for msg in &filtered {
prop_assert!(original_senders.contains(&msg.sender));
}
}
#[test]
fn filter_nonexistent_user_empty(messages in arb_messages(20)) {
let config = FilterConfig::new().with_user("NonExistentUser12345".to_string());
let filtered = apply_filters(messages, &config);
prop_assert!(filtered.is_empty());
}
#[test]
fn date_filter_excludes_no_timestamp(messages in arb_messages(20)) {
if let Ok(config) = FilterConfig::new().after_date("2024-01-01") {
let filtered = apply_filters(messages, &config);
for msg in &filtered {
prop_assert!(msg.timestamp.is_some() || filtered.is_empty());
}
}
}
}
proptest! {
#![proptest_config(ProptestConfig::with_cases(300))]
#[test]
fn merge_never_panics(messages in arb_messages(50)) {
let _ = merge_consecutive(messages);
}
#[test]
fn filter_never_panics(messages in arb_messages(50)) {
let config = FilterConfig::new().with_user("Test".to_string());
let _ = apply_filters(messages, &config);
}
#[test]
fn special_chars_dont_break_merge(sender in prop::sample::select(vec!["A", "B", "C"])) {
let msg = Message {
sender: sender.to_string(),
content: "test;with\"special\nchars\ttab\r\n\0null".to_string(),
timestamp: None,
id: None,
reply_to: None,
edited: None,
};
let _ = merge_consecutive(vec![msg.clone(), msg]);
}
#[test]
fn unicode_content_preserved(idx in 0usize..7) {
let contents = [
"ะัะธะฒะตั",
"ใใใซใกใฏ",
"ู
ุฑุญุจุง",
"๐๐ฅ๐",
"Mixed ะขะตัั ๆฅๆฌ",
"๐จโ๐ฉโ๐งโ๐ฆ", "e\u{0301}", ];
let content = contents[idx].to_string();
let msg = Message {
sender: "User".to_string(),
content: content.clone(),
timestamp: None,
id: None,
reply_to: None,
edited: None,
};
let merged = merge_consecutive(vec![msg]);
prop_assert_eq!(&merged[0].content, &content);
}
}
proptest! {
#![proptest_config(ProptestConfig::with_cases(200))]
#[test]
fn message_serde_roundtrip(msg in arb_message()) {
let json = serde_json::to_string(&msg).expect("serialize");
let parsed: Message = serde_json::from_str(&json).expect("deserialize");
prop_assert_eq!(msg.sender, parsed.sender);
prop_assert_eq!(msg.content, parsed.content);
}
#[test]
fn message_full_serde_roundtrip(
sender in prop::sample::select(vec!["Alice", "Bob"]),
content in prop::sample::select(vec!["Hello", "Test"]),
ts in 1700000000i64..1800000000i64,
id in 1u64..1000u64,
reply in prop::option::of(1u64..1000u64)
) {
let msg = Message {
sender: sender.to_string(),
content: content.to_string(),
timestamp: chrono::DateTime::from_timestamp(ts, 0),
id: Some(id),
reply_to: reply,
edited: None,
};
let json = serde_json::to_string(&msg).expect("serialize");
let parsed: Message = serde_json::from_str(&json).expect("deserialize");
prop_assert_eq!(msg.sender, parsed.sender);
prop_assert_eq!(msg.content, parsed.content);
prop_assert_eq!(msg.id, parsed.id);
prop_assert_eq!(msg.reply_to, parsed.reply_to);
}
}
proptest! {
#![proptest_config(ProptestConfig::with_cases(200))]
#[test]
fn telegram_extract_never_panics(value in arb_telegram_text_value()) {
let _ = extract_telegram_text(&value);
}
#[test]
fn telegram_extract_valid_utf8(value in arb_telegram_text_value()) {
let result = extract_telegram_text(&value);
prop_assert!(result.len() <= result.len()); }
#[test]
fn telegram_extract_string_identity(s in "[a-zA-Z0-9 ]{0,50}") {
let value = json!(s);
let result = extract_telegram_text(&value);
prop_assert_eq!(result, s);
}
#[test]
fn telegram_extract_array_concat(parts in prop::collection::vec("[a-zA-Z]{1,10}", 1..5)) {
let value = Value::Array(parts.iter().map(|s| json!(s)).collect());
let result = extract_telegram_text(&value);
let expected: String = parts.concat();
prop_assert_eq!(result, expected);
}
#[test]
fn telegram_parse_ts_never_panics(ts_str in arb_unix_timestamp_str()) {
let _ = parse_unix_timestamp(&ts_str);
}
#[test]
fn telegram_valid_ts_parses(ts in 1000000000i64..2000000000i64) {
let ts_str = ts.to_string();
let result = parse_unix_timestamp(&ts_str);
prop_assert!(result.is_some());
prop_assert_eq!(result.unwrap().timestamp(), ts);
}
#[test]
fn telegram_parse_message_never_panics(
msg_type in prop::sample::select(vec!["message", "service", "other"]),
sender in prop::option::of(prop::sample::select(vec!["Alice", "Bob"])),
text_value in arb_telegram_text_value(),
ts in prop::option::of(1700000000i64..1800000000i64)
) {
let msg = TelegramRawMessage {
id: Some(123),
msg_type: msg_type.to_string(),
date_unixtime: ts.map(|t| t.to_string()),
from: sender.map(|s| s.to_string()),
text: Some(text_value),
reply_to_message_id: None,
edited_unixtime: None,
};
let _ = parse_telegram_message(&msg);
}
#[test]
fn telegram_skip_service_messages(
msg_type in prop::sample::select(vec!["service", "action", "unknown"])
) {
let msg = TelegramRawMessage {
id: Some(123),
msg_type: msg_type.to_string(),
date_unixtime: Some("1700000000".to_string()),
from: Some("Alice".to_string()),
text: Some(json!("Hello")),
reply_to_message_id: None,
edited_unixtime: None,
};
let result = parse_telegram_message(&msg);
prop_assert!(result.is_none());
}
}
proptest! {
#![proptest_config(ProptestConfig::with_cases(200))]
#[test]
fn whatsapp_detect_never_panics(lines in prop::collection::vec(arb_whatsapp_line(), 0..20)) {
let refs: Vec<&str> = lines.iter().map(String::as_str).collect();
let _ = detect_whatsapp_format(&refs);
}
#[test]
fn whatsapp_detect_empty_is_none(_dummy in Just(())) {
let result = detect_whatsapp_format(&[]);
prop_assert!(result.is_none());
}
#[test]
fn whatsapp_detect_deterministic(lines in prop::collection::vec(arb_whatsapp_line(), 1..10)) {
let refs: Vec<&str> = lines.iter().map(String::as_str).collect();
let result1 = detect_whatsapp_format(&refs);
let result2 = detect_whatsapp_format(&refs);
prop_assert_eq!(result1, result2);
}
#[test]
fn whatsapp_detect_us_format(_dummy in Just(())) {
let lines = vec![
"[1/15/24, 10:30:45 AM] Alice: Hello",
"[12/31/23, 11:59:59 PM] Bob: Bye",
];
let result = detect_whatsapp_format(&lines);
prop_assert_eq!(result, Some(DateFormat::US));
}
#[test]
fn whatsapp_system_check_never_panics(
sender in arb_sender(),
content in arb_content_for_system_check()
) {
let _ = is_whatsapp_system_message(&sender, &content);
}
#[test]
fn whatsapp_empty_sender_is_system(content in arb_content_for_system_check()) {
prop_assert!(is_whatsapp_system_message("", &content));
prop_assert!(is_whatsapp_system_message(" ", &content));
}
#[test]
fn whatsapp_system_sender_is_system(content in prop::sample::select(vec!["Hello", "Test"])) {
prop_assert!(is_whatsapp_system_message("WhatsApp", content));
prop_assert!(is_whatsapp_system_message("whatsapp", content));
prop_assert!(is_whatsapp_system_message("System", content));
prop_assert!(is_whatsapp_system_message("SYSTEM", content));
}
#[test]
fn whatsapp_parse_ts_never_panics(
date in prop::sample::select(vec!["1/15/24", "15.01.24", "15/01/2024", "invalid"]),
time in prop::sample::select(vec!["10:30:45 AM", "23:59:59", "10:30", "invalid"]),
format in prop::sample::select(vec![
DateFormat::US,
DateFormat::EuDotBracketed,
DateFormat::EuSlash,
])
) {
let _ = parse_whatsapp_timestamp(date, time, format);
}
#[test]
fn whatsapp_valid_us_ts_parses(
month in 1u32..=12,
day in 1u32..=28,
hour in 1u32..=12
) {
let date = format!("{}/{}/24", month, day);
let time = format!("{}:30:00 AM", hour);
let result = parse_whatsapp_timestamp(&date, &time, DateFormat::US);
prop_assert!(result.is_some());
}
}
proptest! {
#![proptest_config(ProptestConfig::with_cases(200))]
#[test]
fn instagram_mojibake_never_panics(input in arb_mojibake_input()) {
let _ = fix_mojibake_encoding(&input);
}
#[test]
fn instagram_ascii_passthrough(s in "[a-zA-Z0-9 !?.,]{0,100}") {
let result = fix_mojibake_encoding(&s);
prop_assert_eq!(result, s);
}
#[test]
fn instagram_empty_passthrough(_dummy in Just(())) {
let result = fix_mojibake_encoding("");
prop_assert_eq!(result, "");
}
#[test]
fn instagram_output_valid_utf8(input in arb_mojibake_input()) {
let result = fix_mojibake_encoding(&input);
let _ = result.len();
}
#[test]
fn instagram_parse_never_panics(msg in arb_instagram_raw_message()) {
let _ = parse_instagram_message(&msg, false);
let _ = parse_instagram_message(&msg, true);
}
#[test]
fn instagram_parse_owned_never_panics(msg in arb_instagram_raw_message()) {
let _ = parse_instagram_message_owned(msg, false);
}
#[test]
fn instagram_no_content_is_none(sender in prop::sample::select(vec!["user", "test"])) {
let msg = InstagramRawMessage {
sender_name: sender.to_string(),
timestamp_ms: 1700000000000,
content: None,
share: None,
photos: None,
videos: None,
audio_files: None,
};
let result = parse_instagram_message(&msg, false);
prop_assert!(result.is_none());
}
#[test]
fn instagram_share_fallback(sender in prop::sample::select(vec!["user", "test"])) {
let msg = InstagramRawMessage {
sender_name: sender.to_string(),
timestamp_ms: 1700000000000,
content: None,
share: Some(InstagramShare {
share_text: Some("Shared!".to_string()),
link: None,
}),
photos: None,
videos: None,
audio_files: None,
};
let result = parse_instagram_message(&msg, false);
prop_assert!(result.is_some());
prop_assert_eq!(result.unwrap().content, "Shared!");
}
}
proptest! {
#![proptest_config(ProptestConfig::with_cases(200))]
#[test]
fn discord_parse_never_panics(msg in arb_discord_raw_message()) {
let _ = parse_discord_message(&msg);
}
#[test]
fn discord_empty_is_none(
id in prop::sample::select(vec!["123", "456"]),
name in prop::sample::select(vec!["alice", "bob"])
) {
let msg = DiscordRawMessage {
id: id.to_string(),
timestamp: "2024-01-15T10:30:00+00:00".to_string(),
timestamp_edited: None,
content: String::new(),
author: DiscordAuthor {
name: name.to_string(),
nickname: None,
},
reference: None,
attachments: None,
stickers: None,
};
let result = parse_discord_message(&msg);
prop_assert!(result.is_none());
}
#[test]
fn discord_nickname_priority(
name in prop::sample::select(vec!["user123", "user456"]),
nickname in prop::sample::select(vec!["Display Name", "Nick"])
) {
let msg = DiscordRawMessage {
id: "123".to_string(),
timestamp: "2024-01-15T10:30:00+00:00".to_string(),
timestamp_edited: None,
content: "Test".to_string(),
author: DiscordAuthor {
name: name.to_string(),
nickname: Some(nickname.to_string()),
},
reference: None,
attachments: None,
stickers: None,
};
let result = parse_discord_message(&msg);
prop_assert!(result.is_some());
prop_assert_eq!(result.unwrap().sender, nickname);
}
#[test]
fn discord_attachments_appended(filename in prop::sample::select(vec!["test.png", "file.pdf"])) {
let msg = DiscordRawMessage {
id: "123".to_string(),
timestamp: "2024-01-15T10:30:00+00:00".to_string(),
timestamp_edited: None,
content: "Check this".to_string(),
author: DiscordAuthor {
name: "alice".to_string(),
nickname: None,
},
reference: None,
attachments: Some(vec![DiscordAttachment {
file_name: filename.to_string(),
}]),
stickers: None,
};
let result = parse_discord_message(&msg);
prop_assert!(result.is_some());
let content = result.unwrap().content;
let expected = format!("[Attachment: {}]", filename);
prop_assert!(content.contains(&expected), "Missing attachment marker in content");
}
#[test]
fn discord_stickers_appended(sticker_name in prop::sample::select(vec!["Cool", "Nice"])) {
let msg = DiscordRawMessage {
id: "123".to_string(),
timestamp: "2024-01-15T10:30:00+00:00".to_string(),
timestamp_edited: None,
content: "Look".to_string(),
author: DiscordAuthor {
name: "bob".to_string(),
nickname: None,
},
reference: None,
attachments: None,
stickers: Some(vec![DiscordSticker {
name: sticker_name.to_string(),
}]),
};
let result = parse_discord_message(&msg);
prop_assert!(result.is_some());
let content = result.unwrap().content;
let expected = format!("[Sticker: {}]", sticker_name);
prop_assert!(content.contains(&expected), "Missing sticker marker in content");
}
#[test]
fn discord_attachment_only_kept(filename in prop::sample::select(vec!["a.jpg", "b.mp4"])) {
let msg = DiscordRawMessage {
id: "123".to_string(),
timestamp: "2024-01-15T10:30:00+00:00".to_string(),
timestamp_edited: None,
content: String::new(),
author: DiscordAuthor {
name: "user".to_string(),
nickname: None,
},
reference: None,
attachments: Some(vec![DiscordAttachment {
file_name: filename.to_string(),
}]),
stickers: None,
};
let result = parse_discord_message(&msg);
prop_assert!(result.is_some());
}
}
proptest! {
#![proptest_config(ProptestConfig::with_cases(100))]
#[test]
fn csv_output_never_panics(messages in arb_messages(20)) {
let config = OutputConfig::default();
let _ = to_csv(&messages, &config);
}
#[test]
fn csv_has_header(messages in arb_messages(10)) {
let config = OutputConfig::default();
if let Ok(csv) = to_csv(&messages, &config) {
prop_assert!(csv.contains("Sender") && csv.contains("Content"));
}
}
#[test]
fn json_output_never_panics(messages in arb_messages(20)) {
let config = OutputConfig::default();
let _ = to_json(&messages, &config);
}
#[test]
fn json_output_valid(messages in arb_messages(10)) {
let config = OutputConfig::default();
if let Ok(json_str) = to_json(&messages, &config) {
let parsed: Result<Value, _> = serde_json::from_str(&json_str);
prop_assert!(parsed.is_ok(), "Invalid JSON: {}", json_str);
}
}
#[test]
fn jsonl_output_never_panics(messages in arb_messages(20)) {
let config = OutputConfig::default();
let _ = to_jsonl(&messages, &config);
}
#[test]
fn jsonl_valid_per_line(messages in arb_messages(5)) {
let config = OutputConfig::default();
if let Ok(jsonl) = to_jsonl(&messages, &config) {
for line in jsonl.lines() {
if !line.is_empty() {
let parsed: Result<Value, _> = serde_json::from_str(line);
prop_assert!(parsed.is_ok(), "Invalid JSONL line: {}", line);
}
}
}
}
#[test]
fn csv_special_chars_safe(content in prop::sample::select(vec![
"Normal".to_string(),
"With;semicolon".to_string(),
"With\"quote".to_string(),
"With\nnewline".to_string(),
"All;\"chars\nhere".to_string(),
"Tab\there".to_string(),
])) {
let msg = Message::new("User", content);
let config = OutputConfig::default();
let result = to_csv(&[msg], &config);
prop_assert!(result.is_ok());
}
}
proptest! {
#![proptest_config(ProptestConfig::with_cases(100))]
#[test]
fn filter_after_date_never_panics(date_str in arb_date_string()) {
let _ = FilterConfig::new().after_date(&date_str);
}
#[test]
fn filter_before_date_never_panics(date_str in arb_date_string()) {
let _ = FilterConfig::new().before_date(&date_str);
}
#[test]
fn filter_valid_date_parses(
year in 2000i32..2100,
month in 1u32..=12,
day in 1u32..=28
) {
let date_str = format!("{:04}-{:02}-{:02}", year, month, day);
let result = FilterConfig::new().after_date(&date_str);
prop_assert!(result.is_ok(), "Failed to parse: {}", date_str);
}
#[test]
fn filter_invalid_format_errors(
format in prop::sample::select(vec![
"01-15-2024",
"15/01/2024",
"2024/01/15",
"not-a-date",
"",
])
) {
let result = FilterConfig::new().after_date(format);
prop_assert!(result.is_err());
}
#[test]
fn filter_leap_year_handled(_dummy in Just(())) {
let valid = FilterConfig::new().after_date("2024-02-29");
prop_assert!(valid.is_ok());
let invalid = FilterConfig::new().after_date("2023-02-29");
prop_assert!(invalid.is_err());
}
}
#[cfg(test)]
mod edge_cases {
use super::*;
#[test]
fn merge_consecutive_same_sender() {
let messages = vec![
Message::new("Alice", "Hello"),
Message::new("Alice", "World"),
];
let merged = merge_consecutive(messages);
assert_eq!(merged.len(), 1);
assert!(merged[0].content.contains("Hello"));
assert!(merged[0].content.contains("World"));
}
#[test]
fn merge_alternating_senders() {
let messages = vec![
Message::new("Alice", "Hi"),
Message::new("Bob", "Hey"),
Message::new("Alice", "Bye"),
];
let merged = merge_consecutive(messages);
assert_eq!(merged.len(), 3);
}
#[test]
fn filter_empty_messages() {
let messages: Vec<Message> = vec![];
let config = FilterConfig::new().with_user("Anyone".into());
let filtered = apply_filters(messages, &config);
assert!(filtered.is_empty());
}
#[test]
fn merge_with_empty_content() {
let messages = vec![
Message::new("Alice", ""),
Message::new("Alice", "Real message"),
];
let merged = merge_consecutive(messages);
assert_eq!(merged.len(), 1);
}
#[test]
fn telegram_extract_null_value() {
let value = json!(null);
let result = extract_telegram_text(&value);
assert_eq!(result, "");
}
#[test]
fn telegram_extract_number_value() {
let value = json!(12345);
let result = extract_telegram_text(&value);
assert_eq!(result, "");
}
#[test]
fn telegram_extract_nested_array() {
let value = json!([
"Start ",
{"type": "link", "text": "http://example.com"},
" middle ",
{"type": "bold", "text": "bold text"},
" end"
]);
let result = extract_telegram_text(&value);
assert_eq!(result, "Start http://example.com middle bold text end");
}
#[test]
fn whatsapp_all_formats_detected() {
let us_lines = vec!["[1/15/24, 10:30:45 AM] Alice: Hello"];
assert_eq!(detect_whatsapp_format(&us_lines), Some(DateFormat::US));
let eu_dot = vec!["[25.01.24, 10:30:45] Alice: Hello"];
assert_eq!(
detect_whatsapp_format(&eu_dot),
Some(DateFormat::EuDotBracketed)
);
let eu_dot_no = vec!["25.01.2024, 10:30 - Alice: Hello"];
assert_eq!(
detect_whatsapp_format(&eu_dot_no),
Some(DateFormat::EuDotNoBracket)
);
let eu_slash = vec!["25/01/2024, 10:30 - Alice: Hello"];
assert_eq!(detect_whatsapp_format(&eu_slash), Some(DateFormat::EuSlash));
}
#[test]
fn instagram_mojibake_ascii_only() {
assert_eq!(
fix_mojibake_encoding("Hello World 123!"),
"Hello World 123!"
);
}
#[test]
fn discord_multiple_attachments() {
let msg = DiscordRawMessage {
id: "123".to_string(),
timestamp: "2024-01-15T10:30:00+00:00".to_string(),
timestamp_edited: None,
content: "Files:".to_string(),
author: DiscordAuthor {
name: "user".to_string(),
nickname: None,
},
reference: None,
attachments: Some(vec![
DiscordAttachment {
file_name: "a.png".to_string(),
},
DiscordAttachment {
file_name: "b.jpg".to_string(),
},
]),
stickers: None,
};
let result = parse_discord_message(&msg).unwrap();
assert!(result.content.contains("[Attachment: a.png]"));
assert!(result.content.contains("[Attachment: b.jpg]"));
}
#[test]
fn csv_output_with_all_options() {
let msg = Message {
sender: "Alice".to_string(),
content: "Hello".to_string(),
timestamp: chrono::DateTime::from_timestamp(1700000000, 0),
id: Some(123),
reply_to: Some(100),
edited: chrono::DateTime::from_timestamp(1700000100, 0),
};
let config = OutputConfig {
include_timestamps: true,
include_ids: true,
include_replies: true,
include_edited: true,
};
let csv = to_csv(&[msg], &config).unwrap();
assert!(csv.contains("ID"));
assert!(csv.contains("Timestamp"));
assert!(csv.contains("Sender"));
assert!(csv.contains("Content"));
assert!(csv.contains("ReplyTo"));
assert!(csv.contains("Edited"));
}
#[test]
fn very_long_message() {
let long_content = "x".repeat(100_000);
let msg = Message::new("User", &long_content);
let merged = merge_consecutive(vec![msg.clone()]);
assert_eq!(merged[0].content.len(), 100_000);
let json = serde_json::to_string(&msg).unwrap();
let parsed: Message = serde_json::from_str(&json).unwrap();
assert_eq!(parsed.content.len(), 100_000);
}
#[test]
fn unicode_normalization_edge_cases() {
let combining = "e\u{0301}"; let msg = Message::new("User", combining);
let merged = merge_consecutive(vec![msg]);
assert_eq!(merged[0].content, combining);
let family = "๐จโ๐ฉโ๐งโ๐ฆ";
let msg2 = Message::new("User", family);
let merged2 = merge_consecutive(vec![msg2]);
assert_eq!(merged2[0].content, family);
}
#[test]
fn filter_date_boundaries() {
use chrono::TimeZone;
let messages = vec![
Message {
sender: "Alice".to_string(),
content: "At start".to_string(),
timestamp: Some(chrono::Utc.with_ymd_and_hms(2024, 1, 1, 0, 0, 0).unwrap()),
id: None,
reply_to: None,
edited: None,
},
Message {
sender: "Alice".to_string(),
content: "At end".to_string(),
timestamp: Some(
chrono::Utc
.with_ymd_and_hms(2024, 1, 1, 23, 59, 59)
.unwrap(),
),
id: None,
reply_to: None,
edited: None,
},
];
let config = FilterConfig::new()
.after_date("2024-01-01")
.unwrap()
.before_date("2024-01-01")
.unwrap();
let filtered = apply_filters(messages, &config);
assert_eq!(filtered.len(), 2);
}
}