utf16_ext/
read.rs

1use std::io::{Read, Error, ErrorKind};
2
3use byteorder::{ByteOrder, ReadBytesExt};
4
5/// Extension to the `Read` trait
6pub trait Utf16ReadExt: ReadBytesExt {
7    /// Transforms this instance into an `Iterator` over its u16-units (shorts).
8    ///
9    /// The returned type implements `Iterator` where the `Item` is `Result<u16, R::Err>`.
10    /// The yielded item is `Ok` if a short was successfully read and `Err` otherwise.
11    /// EOF is mapped to returning `None` from this iterator.
12    fn shorts<T: ByteOrder>(self) -> Shorts<T, Self>
13    where Self: Sized {
14        Shorts(PhantomData, self)
15    }
16    /// Transforms this instance into an `Iterator` over `char`s from utf-16.
17    ///
18    /// The returned type implements `Iterator` where the `Item` is `Result<char, R::Err>`.
19    fn utf16_chars<T: ByteOrder>(self) -> Chars<T, Self>
20    where Self: Sized {
21        Chars(PhantomData, self)
22    }
23    /// Reads all chars (from utf16) until a newline is reached (U+000A) and
24    /// appends them to the provided buffer.
25    fn read_utf16_line<T: ByteOrder>(&mut self, buf: &mut String) -> Result<usize, Error> {
26        let mut len = 0;
27        for c in self.utf16_chars::<T>() {
28            match c {
29                Ok(c) => {
30                    buf.push(c);
31                    len += 1;
32                    if c == '\n' {
33                        break
34                    }
35                }
36                Err(e) => match e.kind() {
37                    ErrorKind::Interrupted => continue,
38                    _ => return Err(e),
39                }
40            }
41        }
42        Ok(len)
43    }
44    /// Returns an iterator over the lines of this reader.
45    ///
46    /// Like the normal `BufRead::lines`, newlines characters aren't included
47    fn utf16_lines<T: ByteOrder>(self) -> Lines<T, Self>
48    where Self: Sized {
49        Lines(PhantomData, self)
50    }
51}
52
53impl<T: Read> Utf16ReadExt for T {}
54
55use std::marker::PhantomData;
56
57#[derive(Debug)]
58/// An iterator over `u16` values of a reader.
59pub struct Shorts<T: ByteOrder, R>(PhantomData<T>, R);
60#[derive(Debug)]
61/// An iterator over `char` values of a utf-16 reader.
62pub struct Chars<T: ByteOrder, R>(PhantomData<T>, R);
63
64impl<T: ByteOrder, R: Utf16ReadExt> Iterator for Shorts<T, R> {
65    type Item = Result<u16, Error>;
66    fn next(&mut self) -> Option<Self::Item> {
67        loop {
68            match self.1.read_u16::<T>() {
69                Ok(u) => break Some(Ok(u)),
70                Err(e) => match e.kind() {
71                    ErrorKind::Interrupted => (),
72                    ErrorKind::UnexpectedEof => break None,
73                    _ => break Some(Err(e)),
74                }
75            }
76        }
77    }
78}
79
80use std::char::decode_utf16;
81
82impl<T: ByteOrder, R: Utf16ReadExt> Iterator for Chars<T, R> {
83    type Item = Result<char, Error>;
84    fn next(&mut self) -> Option<Self::Item> {
85        let first = match self.1.read_u16::<T>() {
86            Ok(f) => f,
87            Err(ref e) if e.kind() == ErrorKind::UnexpectedEof => return None,
88            Err(e) => return Some(Err(e))
89        };
90        match decode_utf16(Some(first)).next().unwrap() {
91            Ok(c) => Some(Ok(c)),
92            Err(_) => {
93                let snd = match self.1.read_u16::<T>() {
94                    Ok(f) => f,
95                    Err(ref e) if e.kind() == ErrorKind::UnexpectedEof => return None,
96                    Err(e) => return Some(Err(e))
97                };
98                Some(decode_utf16(Some(first).into_iter().chain(Some(snd))).next().unwrap()
99                    .map_err(|e| Error::new(ErrorKind::InvalidData, e)))
100            }
101        }
102    }
103}
104
105#[derive(Debug)]
106/// An iterator over the lines of a reader (reading utf-16)
107pub struct Lines<T: ByteOrder, B>(PhantomData<T>, B);
108
109impl<T: ByteOrder, B: Utf16ReadExt> Iterator for Lines<T, B> {
110    type Item = Result<String, Error>;
111
112    fn next(&mut self) -> Option<Self::Item> {
113        let mut buf = String::new();
114        match self.1.read_utf16_line::<T>(&mut buf) {
115            Ok(0) => None,
116            Ok(_n) => {
117                if buf.ends_with("\n") {
118                    buf.pop();
119                    if buf.ends_with("\r") {
120                        buf.pop();
121                    }
122                }
123                Some(Ok(buf))
124            }
125            Err(e) => Some(Err(e))
126        }
127    }
128}