use super::utf8::MAX_RUNE;
use super::{decode_rune, full_rune, RUNE_ERROR};
use crate::unicode;
#[test]
fn test_constants() {
assert_eq!(
super::MAX_RUNE,
unicode::MAX_RUNE,
"utf8::MAX_RUNE is wrong"
);
assert_eq!(
super::RUNE_ERROR,
unicode::REPLACEMENT_CHAR,
"utf8.RUNE_ERROR is wrong"
);
}
#[derive(Debug)]
struct Utf8Map {
r: char,
encoded: &'static [u8],
}
impl Utf8Map {
const fn new(r: char, encoded: &'static [u8]) -> Self {
Self { r, encoded }
}
}
static UTF8MAP: &[Utf8Map] = &[
Utf8Map::new('\u{0000}', b"\x00"),
Utf8Map::new('\u{0001}', b"\x01"),
Utf8Map::new('\u{007e}', b"\x7e"),
Utf8Map::new('\u{007f}', b"\x7f"),
Utf8Map::new('\u{0080}', b"\xc2\x80"),
Utf8Map::new('\u{0081}', b"\xc2\x81"),
Utf8Map::new('\u{00bf}', b"\xc2\xbf"),
Utf8Map::new('\u{00c0}', b"\xc3\x80"),
Utf8Map::new('\u{00c1}', b"\xc3\x81"),
Utf8Map::new('\u{00c8}', b"\xc3\x88"),
Utf8Map::new('\u{00d0}', b"\xc3\x90"),
Utf8Map::new('\u{00e0}', b"\xc3\xa0"),
Utf8Map::new('\u{00f0}', b"\xc3\xb0"),
Utf8Map::new('\u{00f8}', b"\xc3\xb8"),
Utf8Map::new('\u{00ff}', b"\xc3\xbf"),
Utf8Map::new('\u{0100}', b"\xc4\x80"),
Utf8Map::new('\u{07ff}', b"\xdf\xbf"),
Utf8Map::new('\u{0400}', b"\xd0\x80"),
Utf8Map::new('\u{0800}', b"\xe0\xa0\x80"),
Utf8Map::new('\u{0801}', b"\xe0\xa0\x81"),
Utf8Map::new('\u{1000}', b"\xe1\x80\x80"),
Utf8Map::new('\u{d000}', b"\xed\x80\x80"),
Utf8Map::new('\u{d7ff}', b"\xed\x9f\xbf"),
Utf8Map::new('\u{e000}', b"\xee\x80\x80"),
Utf8Map::new('\u{fffe}', b"\xef\xbf\xbe"),
Utf8Map::new('\u{ffff}', b"\xef\xbf\xbf"),
Utf8Map::new('\u{10000}', b"\xf0\x90\x80\x80"),
Utf8Map::new('\u{10001}', b"\xf0\x90\x80\x81"),
Utf8Map::new('\u{40000}', b"\xf1\x80\x80\x80"),
Utf8Map::new('\u{10fffe}', b"\xf4\x8f\xbf\xbe"),
Utf8Map::new('\u{10ffff}', b"\xf4\x8f\xbf\xbf"),
Utf8Map::new('\u{FFFD}', b"\xef\xbf\xbd"),
];
struct InvalidUtf8Map {
_r: u32,
encoded: &'static [u8],
}
static SURROGATE_MAP: [InvalidUtf8Map; 2] = [
InvalidUtf8Map {
_r: 0xd800,
encoded: b"\xed\xa0\x80",
},
InvalidUtf8Map {
_r: 0xdfff,
encoded: b"\xed\xbf\xbf",
},
];
const TEST_STRINGS: &[&str] = &[
"",
"abcd",
"☺☻☹",
"日a本b語ç日ð本Ê語þ日¥本¼語i日©",
"日a本b語ç日ð本Ê語þ日¥本¼語i日©日a本b語ç日ð本Ê語þ日¥本¼語i日©日a本b語ç日ð本Ê語þ日¥本¼語i日©",
];
#[test]
fn test_full_rune() {
for m in UTF8MAP {
let b = m.encoded;
assert!(
full_rune(b),
"full_rune({:?}) ({:?}) = false, want true",
b,
m.r
);
let b1 = &b[0..b.len() - 1];
assert!(!full_rune(b1), "full_rune({:?}) = true, want false", b1);
for s in [b"\xc0", b"\xc1"] {
let b = &s[..];
assert!(full_rune(b), "full_rune({:?}) = false, want true", s);
}
}
}
#[test]
fn test_encode_rune() {
for m in UTF8MAP {
let mut buf = [0_u8; 10];
let n = super::encode_rune(&mut buf, m.r as u32);
let got = &buf[..n];
assert_eq!(
m.encoded, got,
"encode_rune(%{:?}) = {:?} want {:?}",
m.r, got, m.encoded
);
}
}
#[test]
fn test_decode_rune() {
for m in UTF8MAP {
let b = m.encoded;
let (r, size) = decode_rune(b);
assert!(
r == m.r && size == b.len(),
"decode_rune({:?}) = {:?}, {} want {:?}, {}",
b,
r,
size,
m.r,
b.len()
);
{
let original_size = b.len();
let mut b = b.to_vec();
b.push(0);
let (r, size) = decode_rune(&b);
assert!(
r == m.r && size == original_size,
"decode_rune({:?}) = {:?}, {} want {:?}, {}",
b,
r,
size,
m.r,
original_size
);
}
let wantsize = if b.len() <= 1 { 0 } else { 1 };
let (r, size) = decode_rune(&b[0..b.len() - 1]);
assert!(
r == RUNE_ERROR && size == wantsize,
"decode_rune({:?}) = %{:?}, {} want %{:?}, {}",
&b[0..b.len() - 1],
r,
size,
RUNE_ERROR,
wantsize
);
let mut b = b.to_vec();
{
let index = b.len() - 1;
let value = if b.len() == 1 { 0x80 } else { 0x7F };
b[index] = value;
let (r, size) = decode_rune(&b);
assert!(
r == RUNE_ERROR && size == 1,
"decode_rune({:?}) = %{:?}, {} want %{:?}, {}",
b,
r,
size,
RUNE_ERROR,
1
);
}
}
}
#[test]
fn test_decode_surrogate_rune() {
for m in &SURROGATE_MAP {
let b = m.encoded;
let (r, size) = decode_rune(b);
assert!(
r == RUNE_ERROR && size == 1,
"decode_rune({:?}) = {}, {} want {}, {}",
b,
r,
size,
RUNE_ERROR,
1,
);
}
}
#[test]
fn test_sequencing() {
for ts in TEST_STRINGS {
for m in UTF8MAP {
for s in [
ts.to_string() + &String::from_utf8_lossy(m.encoded),
String::from_utf8_lossy(m.encoded).to_string() + ts,
ts.to_string() + &String::from_utf8_lossy(m.encoded) + ts,
] {
test_sequence(&s);
}
}
}
}
const INVALID_SEQUENCE_TESTS: &[&[u8]] = &[
b"\xed\xa0\x80\x80", b"\xed\xbf\xbf\x80", b"\x91\x80\x80\x80",
b"\xC2\x7F\x80\x80",
b"\xC2\xC0\x80\x80",
b"\xDF\x7F\x80\x80",
b"\xDF\xC0\x80\x80",
b"\xE0\x9F\xBF\x80",
b"\xE0\xA0\x7F\x80",
b"\xE0\xBF\xC0\x80",
b"\xE0\xC0\x80\x80",
b"\xE1\x7F\xBF\x80",
b"\xE1\x80\x7F\x80",
b"\xE1\xBF\xC0\x80",
b"\xE1\xC0\x80\x80",
b"\xED\x7F\xBF\x80",
b"\xED\x80\x7F\x80",
b"\xED\x9F\xC0\x80",
b"\xED\xA0\x80\x80",
b"\xF0\x8F\xBF\xBF",
b"\xF0\x90\x7F\xBF",
b"\xF0\x90\x80\x7F",
b"\xF0\xBF\xBF\xC0",
b"\xF0\xBF\xC0\x80",
b"\xF0\xC0\x80\x80",
b"\xF1\x7F\xBF\xBF",
b"\xF1\x80\x7F\xBF",
b"\xF1\x80\x80\x7F",
b"\xF1\xBF\xBF\xC0",
b"\xF1\xBF\xC0\x80",
b"\xF1\xC0\x80\x80",
b"\xF4\x7F\xBF\xBF",
b"\xF4\x80\x7F\xBF",
b"\xF4\x80\x80\x7F",
b"\xF4\x8F\xBF\xC0",
b"\xF4\x8F\xC0\x80",
b"\xF4\x90\x80\x80",
];
#[test]
fn test_decode_invalid_sequence() {
for s in INVALID_SEQUENCE_TESTS {
let (r1, _) = decode_rune(s);
assert_eq!(
r1, RUNE_ERROR,
"decode_rune({:?}) = {}, want {}",
s, r1, RUNE_ERROR
);
}
}
fn test_sequence(s: &str) {
#[derive(Clone)]
struct Info {
_index: usize,
_r: u32, }
let mut index = Vec::new();
let b = s.as_bytes();
let mut si = 0;
for r in s.chars() {
index.push(Info {
_index: si,
_r: r as u32,
});
let (r1, size1) = super::decode_rune(&b[si..]);
assert_eq!(
r,
r1,
"decode_rune({:?}) = %{:?}, want %{:?}",
&s[si..],
r1,
r
);
si += size1;
}
}
struct RuneCountTest {
input: &'static [u8],
out: usize,
}
impl RuneCountTest {
const fn new(input: &'static [u8], out: usize) -> Self {
Self { input, out }
}
}
static RUNE_COUNT_TESTS: &[RuneCountTest] = &[
RuneCountTest::new(b"abcd", 4),
RuneCountTest::new("☺☻☹".as_bytes(), 3),
RuneCountTest::new(b"1,2,3,4", 7),
RuneCountTest::new(b"\xe2\x00", 2),
RuneCountTest::new(b"\xe2\x80", 2),
RuneCountTest::new(b"a\xe2\x80", 3),
];
#[test]
fn test_rune_count() {
for tt in RUNE_COUNT_TESTS {
let out = super::rune_count(tt.input);
assert_eq!(
out, tt.out,
"rune_count({:?}) = {:?}, want {:?}",
tt.input, out, tt.out
);
}
}
struct RuneLenTest {
r: u32,
size: isize,
}
impl RuneLenTest {
const fn new(r: u32, size: isize) -> Self {
Self { r, size }
}
}
static RUN_ELEN_TESTS: &[RuneLenTest] = &[
RuneLenTest::new(0, 1),
RuneLenTest::new('e' as u32, 1),
RuneLenTest::new('é' as u32, 2),
RuneLenTest::new('☺' as u32, 3),
RuneLenTest::new(RUNE_ERROR as u32, 3),
RuneLenTest::new(MAX_RUNE as u32, 4),
RuneLenTest::new(0xD800, -1),
RuneLenTest::new(0xDFFF, -1),
RuneLenTest::new(MAX_RUNE as u32 + 1, -1),
];
#[test]
fn test_rune_len() {
for tt in RUN_ELEN_TESTS {
let size = super::rune_len(tt.r);
assert_eq!(
size, tt.size,
"rune_len({:?}) = {}, want {}",
tt.r, size, tt.size
);
}
}