use qubit_codec::{
CStringLiteralCodec,
CodecError,
Decoder,
Encoder,
};
#[test]
fn test_decode_plain_text_and_simple_escapes() {
let codec = CStringLiteralCodec::new();
assert_eq!(
b"PK\x03\x04".to_vec(),
codec
.decode(r"PK\003\004")
.expect("mixed text and octal escapes should decode")
);
assert_eq!(
b"line\nquote\"slash\\tab\tbell\x07backspace\x08".to_vec(),
codec
.decode(r#"line\nquote\"slash\\tab\tbell\abackspace\b"#)
.expect("simple C escapes should decode")
);
assert_eq!(
b"?'\x0b\x0c\r".to_vec(),
codec
.decode(r"\?\'\v\f\r")
.expect("remaining simple escapes should decode")
);
assert_eq!(
b"<!DOCTYPE xbel".to_vec(),
codec
.decode(r"<!DOCTYPE\ xbel")
.expect("escaped space should match Java CStringLiteral")
);
assert_eq!(
b"\t\n\x0b\x0c".to_vec(),
codec
.decode("\t\n\u{0b}\u{0c}")
.expect("allowed raw whitespace source characters should decode")
);
}
#[test]
fn test_decode_hex_octal_and_universal_escapes() {
let codec = CStringLiteralCodec::new();
assert_eq!(
vec![0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1],
codec
.decode(r"\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1")
.expect("hex byte escapes should decode")
);
assert_eq!(
vec![0x89, b'P', b'N', b'G', b'\r', b'\n', 0x1a, b'\n'],
codec
.decode(r"\211PNG\r\n\032\n")
.expect("freedesktop magic escapes should decode")
);
assert_eq!(
b"A\"!".to_vec(),
codec
.decode(r"\x41\u0022\U00000021")
.expect("hex and universal byte escapes should decode")
);
assert_eq!(
vec![0x01, b'Z'],
codec
.decode(r"\x1Z")
.expect("hex escape should consume at most two hex digits")
);
assert_eq!(
vec![0x0a],
codec
.decode(r"\xA")
.expect("hex escape should allow one digit at end of input")
);
assert_eq!(
vec![0x0b],
codec
.decode(r"\XB")
.expect("uppercase hex escape marker should decode")
);
assert_eq!(
vec![0x07],
codec
.decode(r"\7")
.expect("short octal escape at end of input should decode")
);
}
#[test]
fn test_decode_matches_java_c_string_literal_cases() {
let codec = CStringLiteralCodec::new();
assert_eq!(
b"hello, world.".to_vec(),
codec
.decode("hello, world.")
.expect("plain Java fixture should decode")
);
assert_eq!(
b"hello, \"world\".".to_vec(),
codec
.decode(r#"hello, \"world\"."#)
.expect("quoted Java fixture should decode")
);
assert_eq!(
b"hello, \"world\".".to_vec(),
codec
.decode(r"hello, \x22world\x22.")
.expect("hex Java fixture should decode")
);
assert_eq!(
b"hello, \"world\"@123.".to_vec(),
codec
.decode(r"hello, \42world\42\100123.")
.expect("octal Java fixture should decode")
);
assert_eq!(
b"hello, \"world\".".to_vec(),
codec
.decode(r"hello, \u0022world\u0022.")
.expect("short universal Java fixture should decode")
);
assert_eq!(
b"hello, \"world\".".to_vec(),
codec
.decode(r"hello, \U00000022world\U00000022.")
.expect("long universal Java fixture should decode")
);
}
#[test]
fn test_decode_reports_invalid_escape_and_character_errors() {
let trailing = CStringLiteralCodec::new()
.decode(r"abc\")
.expect_err("trailing escape marker should fail");
assert!(matches!(
trailing,
CodecError::InvalidEscape {
index: 3,
escape: _,
reason: _
}
));
let invalid_escape = CStringLiteralCodec::new()
.decode(r"\z")
.expect_err("unsupported escape should fail");
assert!(matches!(
invalid_escape,
CodecError::InvalidEscape {
index: 0,
escape: _,
reason: _
}
));
let missing_hex_digit = CStringLiteralCodec::new()
.decode(r"\xz")
.expect_err("hex escape without digits should fail");
assert!(matches!(
missing_hex_digit,
CodecError::InvalidEscape {
index: 0,
escape: _,
reason: _
}
));
let incomplete_universal = CStringLiteralCodec::new()
.decode(r"\u12")
.expect_err("incomplete universal escape should fail");
assert!(matches!(
incomplete_universal,
CodecError::InvalidEscape {
index: 0,
escape: _,
reason: _
}
));
let invalid_universal_digit = CStringLiteralCodec::new()
.decode(r"\u00zz")
.expect_err("invalid universal escape digit should fail");
assert!(matches!(
invalid_universal_digit,
CodecError::InvalidDigit {
radix: 16,
index: 4,
character: 'z'
}
));
let unicode = CStringLiteralCodec::new()
.decode("snowman: ☃")
.expect_err("non-ASCII source character should fail");
assert!(matches!(
unicode,
CodecError::InvalidCharacter {
index: 9,
character: '☃',
..
}
));
let oversized = CStringLiteralCodec::new()
.decode(r"\u0100")
.expect_err("universal byte escape must fit in one byte");
assert!(matches!(
oversized,
CodecError::InvalidEscape {
index: 0,
escape: _,
reason: _
}
));
}
#[test]
fn test_encode_uses_simple_escapes_and_hex_bytes() {
let codec = CStringLiteralCodec::new();
assert_eq!(
r#"quote\"apos\'question\?slash\\line\n"#,
codec.encode(b"quote\"apos'question?slash\\line\n")
);
assert_eq!(
r"\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1",
codec.encode(&[0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1])
);
assert_eq!(
r"\a\b\f\r\t\v",
codec.encode(&[0x07, 0x08, 0x0c, b'\r', b'\t', 0x0b])
);
assert_eq!("", codec.encode(&[]));
assert_eq!(
r"\x02\x05\x06\x17\x18\x19",
codec.encode(&[0x02, 0x05, 0x06, 0x17, 0x18, 0x19])
);
}
#[test]
fn test_c_string_literal_codec_can_be_used_through_traits() {
let codec = CStringLiteralCodec::new();
let encoded = Encoder::<[u8]>::encode(&codec, b"PK\x03\x04")
.expect("C string literal encode should succeed");
let decoded =
Decoder::<str>::decode(&codec, &encoded).expect("C string literal decode should succeed");
assert_eq!(r"PK\x03\x04", encoded);
assert_eq!(b"PK\x03\x04".to_vec(), decoded);
}