1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
#[cfg(any(feature = "charsets", feature = "json"))]
use std::io::BufReader;
use std::io::{self, Read, Write};

use http::header::HeaderMap;
#[cfg(feature = "json")]
use serde::de::DeserializeOwned;

use crate::error::Result;
use crate::parsing::compressed_reader::CompressedReader;
use crate::request::PreparedRequest;

#[cfg(feature = "charsets")]
use {
    crate::{
        charsets::{self, Charset},
        parsing::buffers::trim_byte,
        parsing::TextReader,
    },
    encoding_rs::Encoding,
    http::header::CONTENT_TYPE,
};

#[cfg(feature = "charsets")]
fn get_charset(headers: &HeaderMap, default_charset: Option<Charset>) -> Charset {
    if let Some(value) = headers.get(CONTENT_TYPE) {
        let bytes = value.as_bytes();
        if let Some(scol) = bytes.iter().position(|&b| b == b';') {
            let rhs = trim_byte(b' ', &bytes[scol + 1..]);
            if rhs.starts_with(b"charset=") {
                if let Some(enc) = Encoding::for_label(&rhs[8..]) {
                    return enc;
                }
            }
        }
    }
    default_charset.unwrap_or(charsets::WINDOWS_1252)
}

/// The `ResponseReader` is used to read the body of a response.
///
/// The `ResponseReader` implements `Read` and can be used like any other stream,
/// but the data returned by `Read` are untouched bytes from the socket. This means
/// that if a string is expected back, it could be in a different encoding than the
/// expected one. In order to properly read text, use the `charsets` feature and the
/// `text` or `text_reader` methods.
///
/// In general it's best to avoid `Read`ing directly from this object. Instead use the
/// helper methods, they process the data stream properly.
#[derive(Debug)]
pub struct ResponseReader {
    inner: CompressedReader,
    #[cfg(feature = "charsets")]
    charset: Charset,
}

impl ResponseReader {
    #[cfg(feature = "charsets")]
    pub(crate) fn new<B>(
        headers: &HeaderMap,
        request: &PreparedRequest<B>,
        reader: CompressedReader,
    ) -> ResponseReader {
        ResponseReader {
            inner: reader,
            charset: get_charset(headers, request.base_settings.default_charset),
        }
    }

    #[cfg(not(feature = "charsets"))]
    pub(crate) fn new<B>(_: &HeaderMap, _: &PreparedRequest<B>, reader: CompressedReader) -> ResponseReader {
        ResponseReader { inner: reader }
    }

    /// Write the response to any object that implements `Write`.
    pub fn write_to<W>(mut self, mut writer: W) -> Result<u64>
    where
        W: Write,
    {
        let n = io::copy(&mut self.inner, &mut writer)?;
        Ok(n)
    }

    /// Read the response to a `Vec` of bytes.
    pub fn bytes(self) -> Result<Vec<u8>> {
        let mut buf = Vec::new();
        self.write_to(&mut buf)?;
        Ok(buf)
    }

    /// Read the response to a `String`.
    ///
    /// If the `charsets` feature is enabled, it will try to decode the response using
    /// the encoding in the headers. If there's no encoding specified in the headers,
    /// it will fall back to the default encoding, and if that's also not specified,
    /// it will fall back to the default of ISO-8859-1.
    ///
    /// If the `charsets` feature is disabled, this method is the same as calling
    /// `text_utf8`.
    ///
    /// Note that both conversions are lossy, i.e. they will not raise errors when
    /// invalid data is encountered but output replacement characters instead.
    #[cfg(not(feature = "charsets"))]
    pub fn text(self) -> Result<String> {
        self.text_utf8()
    }

    /// Read the response to a `String`.
    ///
    /// If the `charsets` feature is enabled, it will try to decode the response using
    /// the encoding in the headers. If there's no encoding specified in the headers,
    /// it will fall back to the default encoding, and if that's also not specified,
    /// it will fall back to the default of ISO-8859-1.
    ///
    /// If the `charsets` feature is disabled, this method is the same as calling
    /// `text_utf8`.
    ///
    /// Note that both conversions are lossy, i.e. they will not raise errors when
    /// invalid data is encountered but output replacement characters instead.
    #[cfg(feature = "charsets")]
    pub fn text(self) -> Result<String> {
        let charset = self.charset;
        self.text_with(charset)
    }

    /// Read the response to a `String`, decoding with the given `Charset`.
    ///
    /// This will ignore the encoding from the response headers and the default encoding, if any.
    ///
    /// This method only exists when the `charsets` feature is enabled.
    #[cfg(feature = "charsets")]
    pub fn text_with(self, charset: Charset) -> Result<String> {
        let mut reader = self.text_reader_with(charset);
        let mut text = String::new();
        reader.read_to_string(&mut text)?;
        Ok(text)
    }

    /// Create a `TextReader` from this `ResponseReader`.
    ///
    /// If the response headers contain charset information, that charset will be used to decode the body.
    /// Otherwise, if a default encoding is set it will be used. If there is no default encoding, ISO-8859-1
    /// will be used.
    ///
    /// This method only exists when the `charsets` feature is enabled.
    #[cfg(feature = "charsets")]
    pub fn text_reader(self) -> TextReader<BufReader<ResponseReader>> {
        let charset = self.charset;
        self.text_reader_with(charset)
    }

    /// Create a `TextReader` from this `ResponseReader`, decoding with the given `Charset`.
    ///
    /// This will ignore the encoding from the response headers and the default encoding, if any.
    ///
    /// This method only exists when the `charsets` feature is enabled.
    #[cfg(feature = "charsets")]
    pub fn text_reader_with(self, charset: Charset) -> TextReader<BufReader<ResponseReader>> {
        TextReader::new(BufReader::new(self), charset)
    }

    /// Read the response body to a String using the UTF-8 encoding.
    ///
    /// This method ignores headers and the default encoding.
    ///
    /// Note that this is lossy, i.e. it will not raise errors when
    /// invalid data is encountered but output replacement characters instead.
    pub fn text_utf8(mut self) -> Result<String> {
        let mut buf = Vec::new();
        self.inner.read_to_end(&mut buf)?;

        let text = String::from_utf8(buf).unwrap_or_else(|err| String::from_utf8_lossy(err.as_bytes()).into_owned());

        Ok(text)
    }

    /// Parse the response as a JSON object and return it.
    ///
    /// If the `charsets` feature is enabled, it will try to decode the response using
    /// the encoding in the headers. If there's no encoding specified in the headers,
    /// it will fall back to the default encoding, and if that's also not specified,
    /// it will fall back to the default of ISO-8859-1.
    ///
    /// If the `charsets` feature is disabled, this method is the same as calling
    /// `json_utf8`.
    #[cfg(feature = "json")]
    #[cfg(feature = "charsets")]
    pub fn json<T>(self) -> Result<T>
    where
        T: DeserializeOwned,
    {
        let reader = BufReader::new(self.text_reader());
        let obj = serde_json::from_reader(reader)?;
        Ok(obj)
    }

    /// Parse the response as a JSON object and return it.
    ///
    /// If the `charsets` feature is enabled, it will try to decode the response using
    /// the encoding in the headers. If there's no encoding specified in the headers,
    /// it will fall back to the default encoding, and if that's also not specified,
    /// it will fall back to the default of ISO-8859-1.
    ///
    /// If the `charsets` feature is disabled, this method is the same as calling
    /// `json_utf8`.
    #[cfg(feature = "json")]
    #[cfg(not(feature = "charsets"))]
    pub fn json<T>(self) -> Result<T>
    where
        T: DeserializeOwned,
    {
        self.json_utf8()
    }

    /// Parse the response as a JSON object encoded in UTF-8.
    ///
    /// This method ignores headers and the default encoding.
    ///
    /// This method only exists when the `json` feature is enabled.
    #[cfg(feature = "json")]
    pub fn json_utf8<T>(self) -> Result<T>
    where
        T: DeserializeOwned,
    {
        let reader = BufReader::new(self);
        let obj = serde_json::from_reader(reader)?;
        Ok(obj)
    }
}

impl Read for ResponseReader {
    #[inline]
    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
        self.inner.read(buf)
    }
}

#[cfg(test)]
#[cfg(feature = "charsets")]
mod tests {
    use http::header::{HeaderMap, HeaderValue, CONTENT_TYPE};

    use super::get_charset;
    use crate::charsets;

    #[test]
    fn test_get_charset_from_header() {
        let mut headers = HeaderMap::new();
        headers.insert(
            CONTENT_TYPE,
            HeaderValue::from_bytes(&b"text/html; charset=UTF-8"[..]).unwrap(),
        );
        assert_eq!(get_charset(&headers, None), charsets::UTF_8);
    }

    #[test]
    fn test_get_charset_from_header_lowercase() {
        let mut headers = HeaderMap::new();
        headers.insert(
            CONTENT_TYPE,
            HeaderValue::from_bytes(&b"text/html; charset=utf8"[..]).unwrap(),
        );
        assert_eq!(get_charset(&headers, None), charsets::UTF_8);
    }

    #[test]
    fn test_get_charset_from_default() {
        let headers = HeaderMap::new();
        assert_eq!(get_charset(&headers, Some(charsets::UTF_8)), charsets::UTF_8);
    }

    #[test]
    fn test_get_charset_standard() {
        let headers = HeaderMap::new();
        assert_eq!(get_charset(&headers, None), charsets::WINDOWS_1252);
    }
}