#![allow(
clippy::approx_constant,
clippy::useless_vec,
clippy::len_zero,
clippy::unnecessary_cast,
clippy::redundant_closure,
clippy::too_many_arguments,
clippy::type_complexity,
clippy::needless_borrow,
clippy::enum_variant_names,
clippy::upper_case_acronyms,
clippy::inconsistent_digit_grouping,
clippy::unit_cmp,
clippy::assertions_on_constants,
clippy::iter_on_single_items,
clippy::expect_fun_call,
clippy::redundant_pattern_matching,
variant_size_differences,
clippy::absurd_extreme_comparisons,
clippy::nonminimal_bool,
clippy::for_kv_map,
clippy::needless_range_loop,
clippy::single_match,
clippy::collapsible_if,
clippy::needless_return,
clippy::redundant_clone,
clippy::map_entry,
clippy::match_single_binding,
clippy::bool_comparison,
clippy::derivable_impls,
clippy::manual_range_contains,
clippy::needless_borrows_for_generic_args,
clippy::manual_map,
clippy::vec_init_then_push,
clippy::identity_op,
clippy::manual_flatten,
clippy::single_char_pattern,
clippy::search_is_some,
clippy::option_map_unit_fn,
clippy::while_let_on_iterator,
clippy::clone_on_copy,
clippy::box_collection,
clippy::redundant_field_names,
clippy::ptr_arg,
clippy::large_enum_variant,
clippy::match_ref_pats,
clippy::needless_pass_by_value,
clippy::unused_unit,
clippy::let_and_return,
clippy::suspicious_else_formatting,
clippy::manual_strip,
clippy::match_like_matches_macro,
clippy::from_over_into,
clippy::wrong_self_convention,
clippy::inherent_to_string,
clippy::new_without_default,
clippy::unnecessary_wraps,
clippy::field_reassign_with_default,
clippy::manual_find,
clippy::unnecessary_lazy_evaluations,
clippy::should_implement_trait,
clippy::missing_safety_doc,
clippy::unusual_byte_groupings,
clippy::bool_assert_comparison,
clippy::zero_prefixed_literal,
clippy::await_holding_lock,
clippy::manual_saturating_arithmetic,
clippy::explicit_counter_loop,
clippy::needless_lifetimes,
clippy::single_component_path_imports,
clippy::uninlined_format_args,
clippy::iter_cloned_collect,
clippy::manual_str_repeat,
clippy::excessive_precision,
clippy::precedence,
clippy::unnecessary_literal_unwrap
)]
use oxicode::{
config, decode_from_slice, decode_from_slice_with_config, encode_to_vec,
encode_to_vec_with_config, Decode, Encode,
};
#[derive(Debug, PartialEq, Encode, Decode)]
enum Script {
Latin,
CyrillicScript,
Arabic,
Hebrew,
Chinese,
Japanese,
Korean,
Emoji,
Mixed,
}
#[derive(Debug, PartialEq, Encode, Decode)]
struct TextSample {
content: String,
script: Script,
byte_count: u32,
char_count: u32,
}
#[test]
fn test_empty_string_roundtrip() {
let s = String::new();
let encoded = encode_to_vec(&s).expect("Failed to encode empty string");
let (decoded, _): (String, usize) =
decode_from_slice(&encoded).expect("Failed to decode empty string");
assert_eq!(s, decoded);
}
#[test]
fn test_ascii_only_string_roundtrip() {
let s = "The quick brown fox jumps over the lazy dog".to_string();
let encoded = encode_to_vec(&s).expect("Failed to encode ASCII string");
let (decoded, _): (String, usize) =
decode_from_slice(&encoded).expect("Failed to decode ASCII string");
assert_eq!(s, decoded);
}
#[test]
fn test_latin_extended_roundtrip() {
let s = "Ångström résumé naïve".to_string();
let encoded = encode_to_vec(&s).expect("Failed to encode Latin extended string");
let (decoded, _): (String, usize) =
decode_from_slice(&encoded).expect("Failed to decode Latin extended string");
assert_eq!(s, decoded);
}
#[test]
fn test_cyrillic_roundtrip() {
let s = "Привет мир".to_string();
let encoded = encode_to_vec(&s).expect("Failed to encode Cyrillic string");
let (decoded, _): (String, usize) =
decode_from_slice(&encoded).expect("Failed to decode Cyrillic string");
assert_eq!(s, decoded);
}
#[test]
fn test_arabic_roundtrip() {
let s = "مرحبا بالعالم".to_string();
let encoded = encode_to_vec(&s).expect("Failed to encode Arabic string");
let (decoded, _): (String, usize) =
decode_from_slice(&encoded).expect("Failed to decode Arabic string");
assert_eq!(s, decoded);
}
#[test]
fn test_hebrew_roundtrip() {
let s = "שלום עולם".to_string();
let encoded = encode_to_vec(&s).expect("Failed to encode Hebrew string");
let (decoded, _): (String, usize) =
decode_from_slice(&encoded).expect("Failed to decode Hebrew string");
assert_eq!(s, decoded);
}
#[test]
fn test_chinese_simplified_roundtrip() {
let s = "你好世界".to_string();
let encoded = encode_to_vec(&s).expect("Failed to encode Chinese string");
let (decoded, _): (String, usize) =
decode_from_slice(&encoded).expect("Failed to decode Chinese string");
assert_eq!(s, decoded);
}
#[test]
fn test_japanese_roundtrip() {
let s = "こんにちは世界".to_string();
let encoded = encode_to_vec(&s).expect("Failed to encode Japanese string");
let (decoded, _): (String, usize) =
decode_from_slice(&encoded).expect("Failed to decode Japanese string");
assert_eq!(s, decoded);
}
#[test]
fn test_korean_roundtrip() {
let s = "안녕하세요".to_string();
let encoded = encode_to_vec(&s).expect("Failed to encode Korean string");
let (decoded, _): (String, usize) =
decode_from_slice(&encoded).expect("Failed to decode Korean string");
assert_eq!(s, decoded);
}
#[test]
fn test_emoji_roundtrip() {
let s = "🎉🦀🔥💯".to_string();
let encoded = encode_to_vec(&s).expect("Failed to encode emoji string");
let (decoded, _): (String, usize) =
decode_from_slice(&encoded).expect("Failed to decode emoji string");
assert_eq!(s, decoded);
}
#[test]
fn test_mixed_scripts_roundtrip() {
let s = "Hello 世界 مرحبا 🌍".to_string();
let encoded = encode_to_vec(&s).expect("Failed to encode mixed scripts string");
let (decoded, _): (String, usize) =
decode_from_slice(&encoded).expect("Failed to decode mixed scripts string");
assert_eq!(s, decoded);
}
#[test]
fn test_control_characters_roundtrip() {
let s = "line1\tcolumn2\nline2\r\nline3".to_string();
let encoded = encode_to_vec(&s).expect("Failed to encode control character string");
let (decoded, _): (String, usize) =
decode_from_slice(&encoded).expect("Failed to decode control character string");
assert_eq!(s, decoded);
assert!(decoded.contains('\t'));
assert!(decoded.contains('\n'));
assert!(decoded.contains('\r'));
}
#[test]
fn test_very_long_string_roundtrip() {
let s = "あいうえおかきくけこ".repeat(100); assert_eq!(s.chars().count(), 1000);
let encoded = encode_to_vec(&s).expect("Failed to encode long string");
let (decoded, _): (String, usize) =
decode_from_slice(&encoded).expect("Failed to decode long string");
assert_eq!(s, decoded);
}
#[test]
fn test_whitespace_only_string_roundtrip() {
let s = " \t\t \n\n\r\n \t ".to_string();
let encoded = encode_to_vec(&s).expect("Failed to encode whitespace string");
let (decoded, _): (String, usize) =
decode_from_slice(&encoded).expect("Failed to decode whitespace string");
assert_eq!(s, decoded);
}
#[test]
fn test_text_sample_latin_roundtrip() {
let content = "Ångström résumé naïve café".to_string();
let byte_count = content.len() as u32;
let char_count = content.chars().count() as u32;
let sample = TextSample {
content,
script: Script::Latin,
byte_count,
char_count,
};
let encoded = encode_to_vec(&sample).expect("Failed to encode Latin TextSample");
let (decoded, _): (TextSample, usize) =
decode_from_slice(&encoded).expect("Failed to decode Latin TextSample");
assert_eq!(sample, decoded);
assert_eq!(decoded.script, Script::Latin);
}
#[test]
fn test_text_sample_japanese_roundtrip() {
let content = "こんにちは世界、日本語のテキストです。".to_string();
let byte_count = content.len() as u32;
let char_count = content.chars().count() as u32;
let sample = TextSample {
content,
script: Script::Japanese,
byte_count,
char_count,
};
let encoded = encode_to_vec(&sample).expect("Failed to encode Japanese TextSample");
let (decoded, _): (TextSample, usize) =
decode_from_slice(&encoded).expect("Failed to decode Japanese TextSample");
assert_eq!(sample, decoded);
assert_eq!(decoded.script, Script::Japanese);
assert!(decoded.byte_count > decoded.char_count);
}
#[test]
fn test_text_sample_emoji_roundtrip() {
let content = "🎉🦀🔥💯🌍🚀🎊🏆".to_string();
let char_count = content.chars().count() as u32;
for c in content.chars() {
assert_eq!(
c.len_utf8(),
4,
"Expected all emoji to be 4-byte UTF-8, got {:?}",
c
);
}
let byte_count = content.len() as u32;
let sample = TextSample {
content,
script: Script::Emoji,
byte_count,
char_count,
};
let encoded = encode_to_vec(&sample).expect("Failed to encode Emoji TextSample");
let (decoded, _): (TextSample, usize) =
decode_from_slice(&encoded).expect("Failed to decode Emoji TextSample");
assert_eq!(sample, decoded);
assert_eq!(decoded.script, Script::Emoji);
assert_eq!(decoded.byte_count, decoded.char_count * 4);
}
#[test]
fn test_vec_text_sample_multiple_scripts_roundtrip() {
let samples: Vec<TextSample> = vec![
{
let c = "Hello world".to_string();
let b = c.len() as u32;
let n = c.chars().count() as u32;
TextSample {
content: c,
script: Script::Latin,
byte_count: b,
char_count: n,
}
},
{
let c = "Привет мир".to_string();
let b = c.len() as u32;
let n = c.chars().count() as u32;
TextSample {
content: c,
script: Script::CyrillicScript,
byte_count: b,
char_count: n,
}
},
{
let c = "你好世界".to_string();
let b = c.len() as u32;
let n = c.chars().count() as u32;
TextSample {
content: c,
script: Script::Chinese,
byte_count: b,
char_count: n,
}
},
{
let c = "안녕하세요".to_string();
let b = c.len() as u32;
let n = c.chars().count() as u32;
TextSample {
content: c,
script: Script::Korean,
byte_count: b,
char_count: n,
}
},
{
let c = "🎉🦀🔥💯🌍".to_string();
let b = c.len() as u32;
let n = c.chars().count() as u32;
TextSample {
content: c,
script: Script::Emoji,
byte_count: b,
char_count: n,
}
},
];
let encoded = encode_to_vec(&samples).expect("Failed to encode Vec<TextSample>");
let (decoded, _): (Vec<TextSample>, usize) =
decode_from_slice(&encoded).expect("Failed to decode Vec<TextSample>");
assert_eq!(samples, decoded);
assert_eq!(decoded.len(), 5);
}
#[test]
fn test_big_endian_config_text_sample_roundtrip() {
let content = "مرحبا بالعالم".to_string();
let byte_count = content.len() as u32;
let char_count = content.chars().count() as u32;
let sample = TextSample {
content,
script: Script::Arabic,
byte_count,
char_count,
};
let cfg = config::standard().with_big_endian();
let encoded =
encode_to_vec_with_config(&sample, cfg).expect("Failed to encode Arabic TextSample (BE)");
let (decoded, _): (TextSample, usize) = decode_from_slice_with_config(&encoded, cfg)
.expect("Failed to decode Arabic TextSample (BE)");
assert_eq!(sample, decoded);
}
#[test]
fn test_fixed_int_config_text_sample_roundtrip() {
let content = "שלום עולם".to_string();
let byte_count = content.len() as u32;
let char_count = content.chars().count() as u32;
let sample = TextSample {
content,
script: Script::Hebrew,
byte_count,
char_count,
};
let cfg = config::standard().with_fixed_int_encoding();
let encoded =
encode_to_vec_with_config(&sample, cfg).expect("Failed to encode Hebrew TextSample (FI)");
let (decoded, _): (TextSample, usize) = decode_from_slice_with_config(&encoded, cfg)
.expect("Failed to decode Hebrew TextSample (FI)");
assert_eq!(sample, decoded);
}
#[test]
fn test_consumed_bytes_equals_total_encoded_for_ascii() {
let s = "hello world".to_string();
let encoded = encode_to_vec(&s).expect("Failed to encode ASCII for consumed bytes test");
let total_len = encoded.len();
let (_, consumed): (String, usize) =
decode_from_slice(&encoded).expect("Failed to decode ASCII for consumed bytes test");
assert_eq!(
consumed, total_len,
"Consumed bytes ({consumed}) should equal total encoded length ({total_len})"
);
}
#[test]
fn test_unicode_char_count_differs_from_byte_count() {
let s = "你好🦀".to_string();
let char_count = s.chars().count(); let byte_count = s.len();
assert_ne!(
char_count, byte_count,
"char count and byte count must differ for Unicode"
);
assert_eq!(char_count, 3);
assert_eq!(byte_count, 10);
let encoded = encode_to_vec(&s).expect("Failed to encode Unicode string");
let (decoded, _): (String, usize) =
decode_from_slice(&encoded).expect("Failed to decode Unicode string");
assert_eq!(s, decoded);
assert_eq!(decoded.chars().count(), 3);
assert_eq!(decoded.len(), 10);
}