qubit_text_io/adapters/
utf8_text_reader.rs1use std::io::{
11 self,
12 BufRead,
13 BufReader,
14 Read,
15};
16
17use crate::{
18 TextLineRead,
19 TextRead,
20};
21
22#[derive(Debug)]
24pub struct Utf8TextReader<R> {
25 inner: R,
26}
27
28impl<R> Utf8TextReader<R>
29where
30 R: BufRead,
31{
32 #[must_use]
40 pub const fn new(inner: R) -> Self {
41 Self { inner }
42 }
43
44 #[must_use]
49 pub const fn get_ref(&self) -> &R {
50 &self.inner
51 }
52
53 pub fn get_mut(&mut self) -> &mut R {
58 &mut self.inner
59 }
60
61 #[must_use]
66 pub fn into_inner(self) -> R {
67 self.inner
68 }
69}
70
71impl<R> Utf8TextReader<BufReader<R>>
72where
73 R: Read,
74{
75 #[must_use]
83 pub fn from_read(reader: R) -> Self {
84 Self {
85 inner: BufReader::new(reader),
86 }
87 }
88}
89
90impl<R> TextRead for Utf8TextReader<R>
91where
92 R: BufRead,
93{
94 type Error = io::Error;
95
96 fn read_char(&mut self) -> Result<Option<char>, Self::Error> {
97 read_utf8_char(&mut self.inner)
98 }
99
100 fn read_chars(&mut self, output: &mut Vec<char>, max: usize) -> Result<usize, Self::Error> {
101 let mut count = 0;
102 while count < max {
103 match self.read_char()? {
104 Some(ch) => {
105 output.push(ch);
106 count += 1;
107 }
108 None => break,
109 }
110 }
111 Ok(count)
112 }
113
114 fn read_to_string(&mut self, output: &mut String) -> Result<usize, Self::Error> {
115 let start = output.len();
116 self.inner.read_to_string(output)?;
117 Ok(output[start..].chars().count())
118 }
119}
120
121impl<R> TextLineRead for Utf8TextReader<R>
122where
123 R: BufRead,
124{
125 fn read_line(&mut self, output: &mut String) -> Result<bool, Self::Error> {
126 Ok(self.inner.read_line(output)? != 0)
127 }
128}
129
130fn read_utf8_char<R>(reader: &mut R) -> io::Result<Option<char>>
142where
143 R: Read + ?Sized,
144{
145 let mut first = [0_u8; 1];
146 let read = reader.read(&mut first)?;
147 if read == 0 {
148 return Ok(None);
149 }
150 let width = utf8_char_width(first[0])?;
151 let mut buffer = [0_u8; 4];
152 buffer[0] = first[0];
153 reader.read_exact(&mut buffer[1..width])?;
154 let text = std::str::from_utf8(&buffer[..width]).map_err(invalid_utf8_error)?;
155 Ok(text.chars().next())
156}
157
158fn utf8_char_width(byte: u8) -> io::Result<usize> {
170 match byte {
171 0x00..=0x7F => Ok(1),
172 0xC2..=0xDF => Ok(2),
173 0xE0..=0xEF => Ok(3),
174 0xF0..=0xF4 => Ok(4),
175 _ => Err(io::Error::new(
176 io::ErrorKind::InvalidData,
177 format!("invalid UTF-8 leading byte: 0x{byte:02X}"),
178 )),
179 }
180}
181
182fn invalid_utf8_error(error: std::str::Utf8Error) -> io::Error {
190 io::Error::new(
191 io::ErrorKind::InvalidData,
192 format!("invalid UTF-8 text: {error}"),
193 )
194}