use std::borrow::Cow;
pub struct CodecError {
pub upto: isize,
pub cause: Cow<'static, str>,
}
pub trait ByteWriter {
fn writer_hint(&mut self, _expectedlen: usize) {}
fn write_byte(&mut self, b: u8);
fn write_bytes(&mut self, v: &[u8]);
}
impl ByteWriter for Vec<u8> {
fn writer_hint(&mut self, expectedlen: usize) {
self.reserve(expectedlen);
}
fn write_byte(&mut self, b: u8) {
self.push(b);
}
fn write_bytes(&mut self, v: &[u8]) {
self.extend(v.iter().cloned());
}
}
pub trait StringWriter {
fn writer_hint(&mut self, _expectedlen: usize) {}
fn write_char(&mut self, c: char);
fn write_str(&mut self, s: &str);
}
impl StringWriter for String {
fn writer_hint(&mut self, expectedlen: usize) {
let newlen = self.len() + expectedlen;
self.reserve(newlen);
}
fn write_char(&mut self, c: char) {
self.push(c);
}
fn write_str(&mut self, s: &str) {
self.push_str(s);
}
}
pub trait RawEncoder: 'static {
fn from_self(&self) -> Box<RawEncoder>;
fn is_ascii_compatible(&self) -> bool { false }
fn raw_feed(&mut self, input: &str, output: &mut ByteWriter) -> (usize, Option<CodecError>);
fn raw_finish(&mut self, output: &mut ByteWriter) -> Option<CodecError>;
}
pub trait RawDecoder: 'static {
fn from_self(&self) -> Box<RawDecoder>;
fn is_ascii_compatible(&self) -> bool { false }
fn raw_feed(&mut self, input: &[u8], output: &mut StringWriter) -> (usize, Option<CodecError>);
fn raw_finish(&mut self, output: &mut StringWriter) -> Option<CodecError>;
}
pub type EncodingRef = &'static (Encoding + Send + Sync);
pub trait Encoding {
fn name(&self) -> &'static str;
fn whatwg_name(&self) -> Option<&'static str> { None }
fn raw_encoder(&self) -> Box<RawEncoder>;
fn raw_decoder(&self) -> Box<RawDecoder>;
fn encode(&self, input: &str, trap: EncoderTrap) -> Result<Vec<u8>, Cow<'static, str>> {
let mut ret = Vec::new();
self.encode_to(input, trap, &mut ret).map(|_| ret)
}
fn encode_to(&self, input: &str, trap: EncoderTrap, ret: &mut ByteWriter)
-> Result<(), Cow<'static, str>>
{
let mut encoder = self.raw_encoder();
let mut remaining = 0;
loop {
let (offset, err) = encoder.raw_feed(&input[remaining..], ret);
let unprocessed = remaining + offset;
match err {
Some(err) => {
remaining = (remaining as isize + err.upto) as usize;
if !trap.trap(&mut *encoder, &input[unprocessed..remaining], ret) {
return Err(err.cause);
}
}
None => {
remaining = input.len();
match encoder.raw_finish(ret) {
Some(err) => {
remaining = (remaining as isize + err.upto) as usize;
if !trap.trap(&mut *encoder, &input[unprocessed..remaining], ret) {
return Err(err.cause);
}
}
None => {}
}
if remaining >= input.len() { return Ok(()); }
}
}
}
}
fn decode(&self, input: &[u8], trap: DecoderTrap) -> Result<String, Cow<'static, str>> {
let mut ret = String::new();
self.decode_to(input, trap, &mut ret).map(|_| ret)
}
fn decode_to(&self, input: &[u8], trap: DecoderTrap, ret: &mut StringWriter)
-> Result<(), Cow<'static, str>>
{
let mut decoder = self.raw_decoder();
let mut remaining = 0;
loop {
let (offset, err) = decoder.raw_feed(&input[remaining..], ret);
let unprocessed = remaining + offset;
match err {
Some(err) => {
remaining = (remaining as isize + err.upto) as usize;
if !trap.trap(&mut *decoder, &input[unprocessed..remaining], ret) {
return Err(err.cause);
}
}
None => {
remaining = input.len();
match decoder.raw_finish(ret) {
Some(err) => {
remaining = (remaining as isize + err.upto) as usize;
if !trap.trap(&mut *decoder, &input[unprocessed..remaining], ret) {
return Err(err.cause);
}
}
None => {}
}
if remaining >= input.len() { return Ok(()); }
}
}
}
}
}
pub type EncoderTrapFunc =
extern "Rust" fn(encoder: &mut RawEncoder, input: &str, output: &mut ByteWriter) -> bool;
pub type DecoderTrapFunc =
extern "Rust" fn(decoder: &mut RawDecoder, input: &[u8], output: &mut StringWriter) -> bool;
#[derive(Copy)]
pub enum DecoderTrap {
Strict,
Replace,
Ignore,
Call(DecoderTrapFunc),
}
impl DecoderTrap {
pub fn trap(&self, decoder: &mut RawDecoder, input: &[u8], output: &mut StringWriter) -> bool {
match *self {
DecoderTrap::Strict => false,
DecoderTrap::Replace => { output.write_char('\u{fffd}'); true },
DecoderTrap::Ignore => true,
DecoderTrap::Call(func) => func(decoder, input, output),
}
}
}
impl Clone for DecoderTrap {
fn clone(&self) -> DecoderTrap {
match *self {
DecoderTrap::Strict => DecoderTrap::Strict,
DecoderTrap::Replace => DecoderTrap::Replace,
DecoderTrap::Ignore => DecoderTrap::Ignore,
DecoderTrap::Call(f) => DecoderTrap::Call(f),
}
}
}
#[derive(Copy)]
pub enum EncoderTrap {
Strict,
Replace,
Ignore,
NcrEscape,
Call(EncoderTrapFunc),
}
impl EncoderTrap {
pub fn trap(&self, encoder: &mut RawEncoder, input: &str, output: &mut ByteWriter) -> bool {
fn reencode(encoder: &mut RawEncoder, input: &str, output: &mut ByteWriter,
trapname: &str) -> bool {
if encoder.is_ascii_compatible() { output.write_bytes(input.as_bytes());
} else {
let (_, err) = encoder.raw_feed(input, output);
if err.is_some() {
panic!("{} cannot reencode a replacement string", trapname);
}
}
true
}
match *self {
EncoderTrap::Strict => false,
EncoderTrap::Replace => reencode(encoder, "?", output, "Replace"),
EncoderTrap::Ignore => true,
EncoderTrap::NcrEscape => {
let mut escapes = String::new();
for ch in input.chars() {
escapes.push_str(&format!("&#{};", ch as isize));
}
reencode(encoder, &escapes, output, "NcrEscape")
},
EncoderTrap::Call(func) => func(encoder, input, output),
}
}
}
impl Clone for EncoderTrap {
fn clone(&self) -> EncoderTrap {
match *self {
EncoderTrap::Strict => EncoderTrap::Strict,
EncoderTrap::Replace => EncoderTrap::Replace,
EncoderTrap::Ignore => EncoderTrap::Ignore,
EncoderTrap::NcrEscape => EncoderTrap::NcrEscape,
EncoderTrap::Call(f) => EncoderTrap::Call(f),
}
}
}
pub fn decode(input: &[u8], trap: DecoderTrap, fallback_encoding: EncodingRef)
-> (Result<String, Cow<'static, str>>, EncodingRef) {
use all::{UTF_8, UTF_16LE, UTF_16BE};
if input.starts_with(&[0xEF, 0xBB, 0xBF]) {
(UTF_8.decode(&input[3..], trap), UTF_8 as EncodingRef)
} else if input.starts_with(&[0xFE, 0xFF]) {
(UTF_16BE.decode(&input[2..], trap), UTF_16BE as EncodingRef)
} else if input.starts_with(&[0xFF, 0xFE]) {
(UTF_16LE.decode(&input[2..], trap), UTF_16LE as EncodingRef)
} else {
(fallback_encoding.decode(input, trap), fallback_encoding)
}
}
#[cfg(test)]
mod tests {
use super::*;
use super::EncoderTrap::NcrEscape;
use util::StrCharIndex;
use std::convert::Into;
struct MyEncoder { flag: bool, prohibit: char, prepend: &'static str, toggle: bool }
impl RawEncoder for MyEncoder {
fn from_self(&self) -> Box<RawEncoder> {
Box::new(MyEncoder { flag: self.flag,
prohibit: self.prohibit,
prepend: self.prepend,
toggle: false })
}
fn is_ascii_compatible(&self) -> bool { self.flag }
fn raw_feed(&mut self, input: &str,
output: &mut ByteWriter) -> (usize, Option<CodecError>) {
for ((i,j), ch) in input.index_iter() {
if ch <= '\u{7f}' && ch != self.prohibit {
if self.toggle && !self.prepend.is_empty() {
output.write_bytes(self.prepend.as_bytes());
}
output.write_byte(ch as u8);
if ch == 'e' {
self.toggle = !self.toggle;
}
} else {
return (i, Some(CodecError { upto: j as isize,
cause: "!!!".into() }));
}
}
(input.len(), None)
}
fn raw_finish(&mut self, _output: &mut ByteWriter) -> Option<CodecError> { None }
}
struct MyEncoding { flag: bool, prohibit: char, prepend: &'static str }
impl Encoding for MyEncoding {
fn name(&self) -> &'static str { "my encoding" }
fn raw_encoder(&self) -> Box<RawEncoder> {
Box::new(MyEncoder { flag: self.flag,
prohibit: self.prohibit,
prepend: self.prepend,
toggle: false })
}
fn raw_decoder(&self) -> Box<RawDecoder> { panic!("not supported") }
}
#[test]
fn test_reencoding_trap_with_ascii_compatible_encoding() {
static COMPAT: &'static MyEncoding =
&MyEncoding { flag: true, prohibit: '\u{80}', prepend: "" };
static INCOMPAT: &'static MyEncoding =
&MyEncoding { flag: false, prohibit: '\u{80}', prepend: "" };
assert_eq!(COMPAT.encode("Hello\u{203d} I'm fine.", NcrEscape),
Ok(b"Hello‽ I'm fine.".to_vec()));
assert_eq!(INCOMPAT.encode("Hello\u{203d} I'm fine.", NcrEscape),
Ok(b"Hello‽ I'm fine.".to_vec()));
}
#[test]
fn test_reencoding_trap_with_ascii_incompatible_encoding() {
static COMPAT: &'static MyEncoding =
&MyEncoding { flag: true, prohibit: '\u{80}', prepend: "*" };
static INCOMPAT: &'static MyEncoding =
&MyEncoding { flag: false, prohibit: '\u{80}', prepend: "*" };
assert_eq!(COMPAT.encode("Hello\u{203d} I'm fine.", NcrEscape),
Ok(b"He*l*l*o‽* *I*'*m* *f*i*n*e.".to_vec()));
assert_eq!(INCOMPAT.encode("Hello\u{203d} I'm fine.", NcrEscape),
Ok(b"He*l*l*o*&*#*8*2*5*3*;* *I*'*m* *f*i*n*e.".to_vec()));
}
#[test]
#[should_panic]
fn test_reencoding_trap_can_fail() {
static FAIL: &'static MyEncoding = &MyEncoding { flag: false, prohibit: '&', prepend: "" };
let _ = FAIL.encode("Hello\u{203d} I'm fine.", NcrEscape);
}
}