encoding_rs 0.6.4

A Gecko-oriented implementation of the Encoding Standard
Documentation
// Copyright 2015-2016 Mozilla Foundation. See the COPYRIGHT
// file at the top-level directory of this distribution.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

use handles::*;
use data::*;
use variant::*;
use super::*;
// Rust 1.14.0 requires the following despite the asterisk above.
use super::in_range16;
use super::in_inclusive_range16;

pub struct EucKrDecoder {
    lead: Option<u8>,
}

impl EucKrDecoder {
    pub fn new() -> VariantDecoder {
        VariantDecoder::EucKr(EucKrDecoder { lead: None })
    }

    fn plus_one_if_lead(&self, byte_length: usize) -> Option<usize> {
        byte_length.checked_add(match self.lead {
            None => 0,
            Some(_) => 1,
        })
    }

    pub fn max_utf16_buffer_length(&self, byte_length: usize) -> Option<usize> {
        self.plus_one_if_lead(byte_length)
    }

    pub fn max_utf8_buffer_length_without_replacement(&self, byte_length: usize) -> Option<usize> {
        // worst case: 2 to 3
        let len = self.plus_one_if_lead(byte_length);
        checked_add_opt(len, checked_div(checked_add(1, len), 2))
    }

    pub fn max_utf8_buffer_length(&self, byte_length: usize) -> Option<usize> {
        checked_mul(3, self.plus_one_if_lead(byte_length))
    }

    ascii_compatible_two_byte_decoder_functions!({
    // If lead is between 0x81 and 0xFE, inclusive,
    // subtract offset 0x81.
                                                     let non_ascii_minus_offset =
                                                         non_ascii.wrapping_sub(0x81);
                                                     if non_ascii_minus_offset > (0xFE - 0x81) {
                                                         return (DecoderResult::Malformed(1, 0),
                                                                 source.consumed(),
                                                                 handle.written());
                                                     }
                                                     non_ascii_minus_offset
                                                 },
                                                 {
                                                     if lead_minus_offset >= 0x20 {
    // Not the extension range above KS X 1001
                                                         let trail_minus_offset =
                                                             byte.wrapping_sub(0xA1);
                                                         if trail_minus_offset <= (0xFE - 0xA1) {
    // KS X 1001
                                                             let ksx_pointer = mul_94(lead_minus_offset - 0x20) + trail_minus_offset as usize;
                                                             let hangul_pointer = ksx_pointer.wrapping_sub((0x2F - 0x20) * 94);
                                                             if hangul_pointer < KSX1001_HANGUL.len() {
                                                                 let upper_bmp = KSX1001_HANGUL[hangul_pointer];
                                                                 handle.write_upper_bmp(upper_bmp)
                                                             } else if ksx_pointer < KSX1001_SYMBOLS.len() {
                                                                 let bmp = KSX1001_SYMBOLS[ksx_pointer];
                                                                 handle.write_bmp_excl_ascii(bmp)
                                                             } else {
                                                                 let hanja_pointer = ksx_pointer.wrapping_sub((0x49 - 0x20) * 94);
                                                                 if hanja_pointer < KSX1001_HANJA.len() {
                                                                     let upper_bmp = KSX1001_HANJA[hanja_pointer];
                                                                     handle.write_upper_bmp(upper_bmp)
                                                                 } else if (lead_minus_offset == 0x27) && ((trail_minus_offset as usize) < KSX1001_UPPERCASE.len()) {
                                                                     let mid_bmp = KSX1001_UPPERCASE[trail_minus_offset as usize];
                                                                     if mid_bmp == 0 {
                                                                         return (DecoderResult::Malformed(2, 0),
                                                                                 unread_handle_trail.consumed(),
                                                                                 handle.written());
                                                                     }
                                                                     handle.write_mid_bmp(mid_bmp)
                                                                 } else if (lead_minus_offset == 0x28) && ((trail_minus_offset as usize) < KSX1001_LOWERCASE.len()) {
                                                                     let mid_bmp = KSX1001_LOWERCASE[trail_minus_offset as usize];
                                                                     handle.write_mid_bmp(mid_bmp)
                                                                 } else if (lead_minus_offset == 0x25) && ((trail_minus_offset as usize) < KSX1001_BOX.len()) {
                                                                     let upper_bmp = KSX1001_BOX[trail_minus_offset as usize];
                                                                     handle.write_upper_bmp(upper_bmp)
                                                                 } else {
                                                                     let other_pointer = ksx_pointer.wrapping_sub(2 * 94);
                                                                     if other_pointer < 0x039F {
                                                                         let bmp = ksx1001_other_decode(other_pointer as u16);
    // ASCII range means unassigned
                                                                         if bmp < 0x80 {
                                                                             return (DecoderResult::Malformed(2, 0),
                                                                                     unread_handle_trail.consumed(),
                                                                                     handle.written());
                                                                         }
                                                                         handle.write_bmp_excl_ascii(bmp)
                                                                     } else {
                                                                         return (DecoderResult::Malformed(2, 0),
                                                                                 unread_handle_trail.consumed(),
                                                                                 handle.written());
                                                                     }
                                                                 }
                                                             }
                                                         } else {
    // Extension range to the left of
    // KS X 1001
                                                             let left_lead = lead_minus_offset - 0x20;
                                                             let left_trail = if byte.wrapping_sub(0x40 + 0x41) < (0x60 - 0x40) {
                                                                 byte - (12 + 0x41)
                                                             } else if byte.wrapping_sub(0x20 + 0x41) < (0x3A - 0x20) {
                                                                 byte - (6 + 0x41)
                                                             } else if byte.wrapping_sub(0x41) < 0x1A {
                                                                 byte - 0x41
                                                             } else {
                                                                 if byte < 0x80 {
                                                                     return (DecoderResult::Malformed(1, 0),
                                                                             unread_handle_trail.unread(),
                                                                             handle.written());
                                                                 }
                                                                 return (DecoderResult::Malformed(2, 0),
                                                                         unread_handle_trail.consumed(),
                                                                         handle.written());
                                                             };
                                                             let left_pointer = ((left_lead as usize) * (190 - 94 - 12)) + left_trail as usize;
                                                             if left_pointer < (0x45 - 0x20) * (190 - 94 - 12) + 0x12 {
                                                                 let upper_bmp = cp949_left_hangul_decode(left_pointer as u16);
                                                                 handle.write_upper_bmp(upper_bmp)
                                                             } else {
                                                                 if byte < 0x80 {
                                                                     return (DecoderResult::Malformed(1, 0),
                                                                             unread_handle_trail.unread(),
                                                                             handle.written());
                                                                 }
                                                                 return (DecoderResult::Malformed(2, 0),
                                                                         unread_handle_trail.consumed(),
                                                                         handle.written());
                                                             }
                                                         }
                                                     } else {
    // Extension range above KS X 1001
                                                         let top_trail = if byte.wrapping_sub(0x40 + 0x41) < (0xBE - 0x40) {
                                                             byte - (12 + 0x41)
                                                         } else if byte.wrapping_sub(0x20 + 0x41) < (0x3A - 0x20) {
                                                             byte - (6 + 0x41)
                                                         } else if byte.wrapping_sub(0x41) < 0x1A {
                                                             byte - 0x41
                                                         } else {
                                                             if byte < 0x80 {
                                                                 return (DecoderResult::Malformed(1, 0),
                                                                         unread_handle_trail.unread(),
                                                                         handle.written());
                                                             }
                                                             return (DecoderResult::Malformed(2, 0),
                                                                     unread_handle_trail.consumed(),
                                                                     handle.written());
                                                         };
                                                         let top_pointer = ((lead_minus_offset as usize) * (190 - 12)) + top_trail as usize;
                                                         let upper_bmp = cp949_top_hangul_decode(top_pointer as u16);
                                                         handle.write_upper_bmp(upper_bmp)
                                                     }
                                                 },
                                                 self,
                                                 non_ascii,
                                                 byte,
                                                 lead_minus_offset,
                                                 unread_handle_trail,
                                                 source,
                                                 handle,
                                                 'outermost,
                                                 copy_ascii_from_check_space_bmp,
                                                 check_space_bmp,
                                                 true);
}

fn ksx1001_encode_misc(bmp: u16) -> Option<(usize, usize)> {
    if in_inclusive_range16(bmp, 0x3000, 0x3015) {
        if let Some(pos) = position(&KSX1001_SYMBOLS[..(0xAB - 0x60)], bmp) {
            return Some((0xA1, pos + 0xA1));
        }
    }
    if let Some(other_pointer) = ksx1001_other_encode(bmp) {
        let other_lead = ((other_pointer as usize) / 94) + (0x81 + 0x22);
        let other_trail = ((other_pointer as usize) % 94) + 0xA1;
        return Some((other_lead, other_trail));
    }
    if in_range16(bmp, 0x00AA, 0x0168) {
        // Latin
        if let Some(pos) = position(&KSX1001_LOWERCASE[..], bmp) {
            return Some((0x81 + 0x28, 0xA1 + pos));
        }
        if let Some(pos) = position(&KSX1001_UPPERCASE[..], bmp) {
            return Some((0x81 + 0x27, 0xA1 + pos));
        }
    } else if in_range16(bmp, 0x2500, 0x254C) {
        if let Some(pos) = position(&KSX1001_BOX[..], bmp) {
            return Some((0x81 + 0x25, 0xA1 + pos));
        }
    }
    if in_inclusive_range16(bmp, 0x2015, 0x266D) || in_inclusive_range16(bmp, 0x321C, 0x33D8) ||
       in_inclusive_range16(bmp, 0xFF3C, 0xFFE5) ||
       in_inclusive_range16(bmp, 0x00A1, 0x00F7) ||
       in_inclusive_range16(bmp, 0x02C7, 0x02DD) {
        if let Some(pos) = position(&KSX1001_SYMBOLS[3..], bmp) {
            if pos < (94 - 3) {
                return Some((0xA1, pos + 0xA1 + 3));
            }
            return Some((0xA2, pos - (94 - 3) + 0xA1));
        }
    }
    None
}

pub struct EucKrEncoder;

impl EucKrEncoder {
    pub fn new(encoding: &'static Encoding) -> Encoder {
        Encoder::new(encoding, VariantEncoder::EucKr(EucKrEncoder))
    }

    pub fn max_buffer_length_from_utf16_without_replacement(&self,
                                                            u16_length: usize)
                                                            -> Option<usize> {
        u16_length.checked_mul(2)
    }

    pub fn max_buffer_length_from_utf8_without_replacement(&self,
                                                           byte_length: usize)
                                                           -> Option<usize> {
        Some(byte_length)
    }

    ascii_compatible_bmp_encoder_functions!({
                                                let bmp_minus_hangul_start =
                                                    bmp.wrapping_sub(0xAC00);
                                                let (lead, trail) = if bmp_minus_hangul_start <
                                                                       (0xD7A4 - 0xAC00) {
                                                    // Hangul
                                                    match KSX1001_HANGUL.binary_search(&bmp) {
                                                        Ok(ksx_hangul_pointer) => {
                                                            let ksx_hangul_lead =
                                                                (ksx_hangul_pointer / 94) +
                                                                (0x81 + 0x2F);
                                                            let ksx_hangul_trail =
                                                                (ksx_hangul_pointer % 94) + 0xA1;
                                                            (ksx_hangul_lead, ksx_hangul_trail)
                                                        }
                                                        Err(_) => {
                                                            let (lead, cp949_trail) = if bmp <
                                                                                         0xC8A5 {
                                                                // Above KS X 1001
                                                                let top_pointer = cp949_top_hangul_encode(bmp) as usize;
                                                                let top_lead = (top_pointer /
                                                                                (190 - 12)) +
                                                                               0x81;
                                                                let top_trail = top_pointer %
                                                                                (190 - 12);
                                                                (top_lead, top_trail)
                                                            } else {
                                                                // To the left of KS X 1001
                                                                let left_pointer = cp949_left_hangul_encode(bmp) as usize;
                                                                let left_lead = (left_pointer /
                                                                                 (190 - 94 - 12)) +
                                                                                (0x81 + 0x20);
                                                                let left_trail = left_pointer %
                                                                                 (190 - 94 - 12);
                                                                (left_lead, left_trail)
                                                            };
                                                            let offset = if cp949_trail >=
                                                                            (0x40 - 12) {
                                                                0x41 + 12
                                                            } else if cp949_trail >=
                                                                            (0x20 - 6) {
                                                                0x41 + 6
                                                            } else {
                                                                0x41
                                                            };
                                                            (lead, cp949_trail + offset)
                                                        }
                                                    }
                                                } else if in_range16(bmp,
                                                                                  0x33DE,
                                                                                  0xFF01) {
                                                    // Vast range that includes no other
                                                    // mappables except Hangul (already
                                                    // processed) and Hanja.
                                                    // Narrow the range further to Unified and
                                                    // Compatibility ranges of Hanja.
                                                    if in_range16(bmp, 0x4E00, 0x9F9D) ||
                                                       in_range16(bmp, 0xF900, 0xFA0C) {
                                                        if let Some(hanja_pointer) =
                                                               position(&KSX1001_HANJA[..], bmp) {
                                                            let hanja_lead = (hanja_pointer / 94) +
                                                                             (0x81 + 0x49);
                                                            let hanja_trail = (hanja_pointer % 94) +
                                                                              0xA1;
                                                            (hanja_lead, hanja_trail)
                                                        } else {
                                                            return (EncoderResult::unmappable_from_bmp(bmp),
                                                                    source.consumed(),
                                                                    handle.written());
                                                        }
                                                    } else {
                                                        return (EncoderResult::unmappable_from_bmp(bmp),
                                                                source.consumed(),
                                                                handle.written());
                                                    }
                                                } else if let Some((lead, trail)) =
                                                                           ksx1001_encode_misc(bmp) {
                                                    (lead, trail)
                                                } else {
                                                    return (EncoderResult::unmappable_from_bmp(bmp),
                                                            source.consumed(),
                                                            handle.written());
                                                };
                                                handle.write_two(lead as u8, trail as u8)
                                            },
                                            bmp,
                                            self,
                                            source,
                                            handle,
                                            copy_ascii_to_check_space_two,
                                            check_space_two,
                                            true);
}

// Any copyright to the test code below this comment is dedicated to the
// Public Domain. http://creativecommons.org/publicdomain/zero/1.0/

#[cfg(test)]
mod tests {
    use super::super::testing::*;
    use super::super::*;

    fn decode_euc_kr(bytes: &[u8], expect: &str) {
        decode(EUC_KR, bytes, expect);
    }

    fn encode_euc_kr(string: &str, expect: &[u8]) {
        encode(EUC_KR, string, expect);
    }

    #[test]
    fn test_euc_kr_decode() {
        // Empty
        decode_euc_kr(b"", &"");

        // ASCII
        decode_euc_kr(b"\x61\x62", "\u{0061}\u{0062}");

        decode_euc_kr(b"\x81\x41", "\u{AC02}");
        decode_euc_kr(b"\x81\x5B", "\u{FFFD}\x5B");
        decode_euc_kr(b"\xFD\xFE", "\u{8A70}");
        decode_euc_kr(b"\xFE\x41", "\u{FFFD}\x41");
        decode_euc_kr(b"\xFF\x41", "\u{FFFD}\x41");
        decode_euc_kr(b"\x80\x41", "\u{FFFD}\x41");
        decode_euc_kr(b"\xA1\xFF", "\u{FFFD}");
        decode_euc_kr(b"\x81\xFF", "\u{FFFD}");
    }

    #[test]
    fn test_euc_kr_encode() {
        // Empty
        encode_euc_kr("", b"");

        // ASCII
        encode_euc_kr("\u{0061}\u{0062}", b"\x61\x62");

        encode_euc_kr("\u{AC02}", b"\x81\x41");
        encode_euc_kr("\u{8A70}", b"\xFD\xFE");
    }

    #[test]
    fn test_euc_kr_decode_all() {
        let input = include_bytes!("test_data/euc_kr_in.txt");
        let expectation = include_str!("test_data/euc_kr_in_ref.txt");
        let (cow, had_errors) = EUC_KR.decode_without_bom_handling(input);
        assert!(had_errors, "Should have had errors.");
        assert_eq!(&cow[..], expectation);
    }

    #[test]
    fn test_euc_kr_encode_all() {
        let input = include_str!("test_data/euc_kr_out.txt");
        let expectation = include_bytes!("test_data/euc_kr_out_ref.txt");
        let (cow, encoding, had_errors) = EUC_KR.encode(input);
        assert!(!had_errors, "Should not have had errors.");
        assert_eq!(encoding, EUC_KR);
        assert_eq!(&cow[..], &expectation[..]);
    }
}