domain_core/master/
source.rs1use std::{char, io};
6use std::io::Read;
7use std::fs::File;
8use std::path::Path;
9use failure::Fail;
10use super::scan::CharSource;
11
12
13impl<'a> CharSource for &'a str {
16 fn next(&mut self) -> Result<Option<char>, io::Error> {
17 let res = match self.chars().next() {
18 Some(ch) => ch,
19 None => return Ok(None),
20 };
21 *self = &self[res.len_utf8()..];
22 Ok(Some(res))
23 }
24}
25
26
27pub struct AsciiFile {
34 file: File,
35 buf: Option<(Box<[u8]>, usize, usize)>,
36}
37
38impl AsciiFile {
39 pub fn new(file: File) -> Self {
40 AsciiFile {
41 file,
42 buf: unsafe {
43 let mut buffer = Vec::with_capacity(CAP);
44 buffer.set_len(CAP);
45 Some((buffer.into_boxed_slice(), 0, 0))
46 }
47 }
48 }
49
50 pub fn open<P: AsRef<Path>>(path: P) -> Result<Self, io::Error> {
52 File::open(path).map(Self::new)
53 }
54}
55
56impl CharSource for AsciiFile {
57 fn next(&mut self) -> Result<Option<char>, io::Error> {
58 let err = if let Some((ref mut buf, ref mut len, ref mut pos))
59 = self.buf {
60 if *pos < *len {
61 let res = buf[*pos];
62 if res.is_ascii() {
63 *pos += 1;
64 return Ok(Some(res as char))
65 }
66 Err(io::Error::new(
67 io::ErrorKind::InvalidData, AsciiError(res).compat()
68 ))
69 }
70 else {
71 match self.file.read(buf) {
72 Ok(0) => Ok(None),
73 Ok(read_len) => {
74 *len = read_len;
75 let res = buf[0];
76 if res.is_ascii() {
77 *pos = 1;
78 return Ok(Some(res as char))
79 }
80 Err(io::Error::new(
81 io::ErrorKind::InvalidData,
82 AsciiError(res).compat()
83 ))
84 }
85 Err(err) => Err(err)
86 }
87 }
88 }
89 else {
90 return Ok(None);
91 };
92 self.buf = None;
93 err
94 }
95}
96
97
98#[derive(Clone, Copy, Debug, Eq, Fail, PartialEq)]
102#[fail(display="invalid ASCII character '{}'", _0)]
103pub struct AsciiError(u8);
104
105
106pub struct Utf8File(OctetFile);
110
111impl Utf8File {
112 pub fn new(file: File) -> Self {
113 Utf8File(OctetFile::new(file))
114 }
115
116 pub fn open<P: AsRef<Path>>(path: P) -> Result<Self, io::Error> {
118 File::open(path).map(Self::new)
119 }
120}
121
122impl CharSource for Utf8File {
123 fn next(&mut self) -> Result<Option<char>, io::Error> {
124 let first = match self.0.next()? {
125 Some(ch) => ch,
126 None => return Ok(None)
127 };
128 if first.is_ascii() { return Ok(Some(first as char))
130 }
131 let second = match self.0.next()? {
132 Some(ch) => ch,
133 None => {
134 return Err(io::Error::new(
135 io::ErrorKind::UnexpectedEof, "unexpected EOF"
136 ))
137 }
138 };
139 if first < 0xC0 || second < 0x80 {
140 return Err(Utf8Error.into())
141 }
142 if first < 0xE0 {
143 return Ok(Some(unsafe {
144 char::from_u32_unchecked(
145 (u32::from(first & 0x1F)) << 6 |
146 u32::from(second & 0x3F)
147 )
148 }))
149 }
150 let third = match self.0.next()? {
151 Some(ch) => ch,
152 None => {
153 return Err(io::Error::new(
154 io::ErrorKind::UnexpectedEof, "unexpected EOF"
155 ))
156 }
157 };
158 if third < 0x80 {
159 return Err(Utf8Error.into())
160 }
161 if first < 0xF0 {
162 return Ok(Some(unsafe {
163 char::from_u32_unchecked(
164 (u32::from(first & 0x0F)) << 12 |
165 (u32::from(second & 0x3F)) << 6 |
166 u32::from(third & 0x3F)
167 )
168 }))
169 }
170 let fourth = match self.0.next()? {
171 Some(ch) => ch,
172 None => {
173 return Err(io::Error::new(
174 io::ErrorKind::UnexpectedEof, "unexpected EOF"
175 ))
176 }
177 };
178 if first > 0xF7 || fourth < 0x80 {
179 return Err(Utf8Error.into())
180 }
181 Ok(Some(unsafe {
182 char::from_u32_unchecked(
183 (u32::from(first & 0x07)) << 18 |
184 (u32::from(second & 0x3F)) << 12 |
185 (u32::from(third & 0x3F)) << 6 |
186 u32::from(fourth & 0x3F)
187 )
188 }))
189 }
190}
191
192
193#[derive(Clone, Copy, Debug, Eq, Fail, PartialEq)]
197#[fail(display="invalid UTF-8 sequence")]
198pub struct Utf8Error;
199
200impl From<Utf8Error> for io::Error {
201 fn from(err: Utf8Error) -> Self {
202 io::Error::new(io::ErrorKind::Other, err.compat())
203 }
204}
205
206
207pub struct OctetFile {
212 file: File,
213 buf: Option<(Box<[u8]>, usize, usize)>,
214}
215
216const CAP: usize = 8 * 1024;
217
218impl OctetFile {
219 pub fn new(file: File) -> Self {
220 OctetFile {
221 file,
222 buf: unsafe {
223 let mut buffer = Vec::with_capacity(CAP);
224 buffer.set_len(CAP);
225 Some((buffer.into_boxed_slice(), 0, 0))
226 }
227 }
228 }
229
230 pub fn open<P: AsRef<Path>>(path: P) -> Result<Self, io::Error> {
232 File::open(path).map(Self::new)
233 }
234
235 #[inline]
236 fn next(&mut self) -> Result<Option<u8>, io::Error> {
237 let err = if let Some((ref mut buf, ref mut len, ref mut pos))
238 = self.buf {
239 if *pos < *len {
240 let res = buf[*pos];
241 *pos += 1;
242 return Ok(Some(res))
243 }
244 else {
245 match self.file.read(buf) {
246 Ok(0) => Ok(None),
247 Ok(read_len) => {
248 *len = read_len;
249 let res = buf[0];
250 if res.is_ascii() {
251 *pos = 1;
252 return Ok(Some(res))
253 }
254 Err(io::Error::new(
255 io::ErrorKind::InvalidData,
256 AsciiError(res).compat()
257 ))
258 }
259 Err(err) => Err(err)
260 }
261 }
262 }
263 else {
264 return Ok(None);
265 };
266 self.buf = None;
267 err
268 }
269}