ureq/body/mod.rs
1use std::fmt;
2use std::io;
3use std::sync::Arc;
4
5pub use build::BodyBuilder;
6use ureq_proto::BodyMode;
7use ureq_proto::http::header;
8
9use crate::Error;
10use crate::http;
11use crate::run::BodyHandler;
12
13use self::limit::LimitReader;
14use self::lossy::LossyUtf8Reader;
15
16mod build;
17mod limit;
18mod lossy;
19
20#[cfg(feature = "charset")]
21mod charset;
22
23#[cfg(feature = "gzip")]
24mod gzip;
25
26#[cfg(feature = "brotli")]
27mod brotli;
28
29/// Default max body size for read_to_string() and read_to_vec().
30const MAX_BODY_SIZE: u64 = 10 * 1024 * 1024;
31
32/// A response body returned as [`http::Response<Body>`].
33///
34/// # Default size limit
35///
36/// Methods like `read_to_string()`, `read_to_vec()`, and `read_json()` have a **default 10MB limit**
37/// to prevent memory exhaustion. To download larger files, use `with_config().limit(new_size)`:
38///
39/// ```
40/// // Download a 20MB file
41/// let bytes = ureq::get("http://httpbin.org/bytes/200000000")
42/// .call()?
43/// .body_mut()
44/// .with_config()
45/// .limit(20 * 1024 * 1024) // 20MB
46/// .read_to_vec()?;
47/// # Ok::<_, ureq::Error>(())
48/// ```
49///
50/// # Body lengths
51///
52/// HTTP/1.1 has two major modes of transfering body data. Either a `Content-Length`
53/// header defines exactly how many bytes to transfer, or `Transfer-Encoding: chunked`
54/// facilitates a streaming style when the size is not known up front.
55///
56/// To protect against a problem called [request smuggling], ureq has heuristics for
57/// how to interpret a server sending both `Transfer-Encoding` and `Content-Length` headers.
58///
59/// 1. `chunked` takes precedence if there both headers are present (not for HTTP/1.0)
60/// 2. `content-length` is used if there is no chunked
61/// 3. If there are no headers, fall back on "close delimited" meaning the socket
62/// must close to end the body
63///
64/// When a `Content-Length` header is used, ureq will ensure the received body is _EXACTLY_
65/// as many bytes as declared (it cannot be less). This mechanic is in `ureq-proto`
66/// and is different to the [`BodyWithConfig::limit()`] below.
67///
68/// # Pool reuse
69///
70/// To return a connection (aka [`Transport`][crate::unversioned::transport::Transport])
71/// to the Agent's pool, the body must be read to end. If [`BodyWithConfig::limit()`] is set
72/// shorter size than the actual response body, the connection will not be reused.
73///
74/// # Example
75///
76/// ```
77/// use std::io::Read;
78/// let mut res = ureq::get("http://httpbin.org/bytes/100")
79/// .call()?;
80///
81/// assert!(res.headers().contains_key("Content-Length"));
82/// let len: usize = res.headers().get("Content-Length")
83/// .unwrap().to_str().unwrap().parse().unwrap();
84///
85/// let mut bytes: Vec<u8> = Vec::with_capacity(len);
86/// res.body_mut().as_reader()
87/// .read_to_end(&mut bytes)?;
88///
89/// assert_eq!(bytes.len(), len);
90/// # Ok::<_, ureq::Error>(())
91/// ```
92///
93/// [request smuggling]: https://en.wikipedia.org/wiki/HTTP_request_smuggling
94pub struct Body {
95 source: BodyDataSource,
96 info: Arc<ResponseInfo>,
97}
98
99enum BodyDataSource {
100 Handler(Box<BodyHandler>),
101 Reader(Box<dyn io::Read + Send + Sync>),
102}
103
104#[derive(Clone)]
105pub(crate) struct ResponseInfo {
106 content_encoding: ContentEncoding,
107 mime_type: Option<String>,
108 charset: Option<String>,
109 body_mode: BodyMode,
110}
111
112impl Body {
113 /// Builder for creating a body
114 ///
115 /// This is useful for testing, or for [`Middleware`][crate::middleware::Middleware] that
116 /// returns another body than the requested one.
117 pub fn builder() -> BodyBuilder {
118 BodyBuilder::new()
119 }
120
121 pub(crate) fn new(handler: BodyHandler, info: ResponseInfo) -> Self {
122 Body {
123 source: BodyDataSource::Handler(Box::new(handler)),
124 info: Arc::new(info),
125 }
126 }
127
128 /// The mime-type of the `content-type` header.
129 ///
130 /// For the below header, we would get `Some("text/plain")`:
131 ///
132 /// ```text
133 /// Content-Type: text/plain; charset=iso-8859-1
134 /// ```
135 ///
136 /// *Caution:* A bad server might set `Content-Type` to one thing and send
137 /// something else. There is no way ureq can verify this.
138 ///
139 /// # Example
140 ///
141 /// ```
142 /// let res = ureq::get("https://www.google.com/")
143 /// .call()?;
144 ///
145 /// assert_eq!(res.body().mime_type(), Some("text/html"));
146 /// # Ok::<_, ureq::Error>(())
147 /// ```
148 pub fn mime_type(&self) -> Option<&str> {
149 self.info.mime_type.as_deref()
150 }
151
152 /// The charset of the `content-type` header.
153 ///
154 /// For the below header, we would get `Some("iso-8859-1")`:
155 ///
156 /// ```text
157 /// Content-Type: text/plain; charset=iso-8859-1
158 /// ```
159 ///
160 /// *Caution:* A bad server might set `Content-Type` to one thing and send
161 /// something else. There is no way ureq can verify this.
162 ///
163 /// # Example
164 ///
165 /// ```
166 /// let res = ureq::get("https://www.google.com/")
167 /// .call()?;
168 ///
169 /// assert_eq!(res.body().charset(), Some("ISO-8859-1"));
170 /// # Ok::<_, ureq::Error>(())
171 /// ```
172 pub fn charset(&self) -> Option<&str> {
173 self.info.charset.as_deref()
174 }
175
176 /// The content length of the body.
177 ///
178 /// This is the value of the `Content-Length` header, if there is one. For chunked
179 /// responses (`Transfer-Encoding: chunked`) , this will be `None`. Similarly for
180 /// HTTP/1.0 without a `Content-Length` header, the response is close delimited,
181 /// which means the length is unknown.
182 ///
183 /// A bad server might set `Content-Length` to one thing and send something else.
184 /// ureq will double check this, see section on body length heuristics.
185 ///
186 /// # Example
187 ///
188 /// ```
189 /// let res = ureq::get("https://httpbin.org/bytes/100")
190 /// .call()?;
191 ///
192 /// assert_eq!(res.body().content_length(), Some(100));
193 /// # Ok::<_, ureq::Error>(())
194 /// ```
195 pub fn content_length(&self) -> Option<u64> {
196 // After transparent decompression, the original Content-Length no longer
197 // reflects the actual body size, so we return None.
198 if self.info.is_decompressing() {
199 return None;
200 }
201 match self.info.body_mode {
202 BodyMode::NoBody => None,
203 BodyMode::LengthDelimited(v) => Some(v),
204 BodyMode::Chunked => None,
205 BodyMode::CloseDelimited => None,
206 }
207 }
208
209 /// Handle this body as a shared `impl Read` of the body.
210 ///
211 /// This is the regular API which goes via [`http::Response::body_mut()`] to get a
212 /// mut reference to the `Body`, and then use `as_reader()`. It is also possible to
213 /// get a non-shared, owned reader via [`Body::into_reader()`].
214 ///
215 /// * Reader is not limited by default. That means a malicious server could
216 /// exhaust all avaliable memory on your client machine.
217 /// To set a limit use [`Body::into_with_config()`].
218 /// * Reader will error if `Content-Length` is set, but the connection is closed
219 /// before all bytes are received.
220 ///
221 /// # Example
222 ///
223 /// ```
224 /// use std::io::Read;
225 ///
226 /// let mut res = ureq::get("http://httpbin.org/bytes/100")
227 /// .call()?;
228 ///
229 /// let mut bytes: Vec<u8> = Vec::with_capacity(1000);
230 /// res.body_mut().as_reader()
231 /// .read_to_end(&mut bytes)?;
232 /// # Ok::<_, ureq::Error>(())
233 /// ```
234 pub fn as_reader(&mut self) -> BodyReader {
235 self.with_config().reader()
236 }
237
238 /// Turn this response into an owned `impl Read` of the body.
239 ///
240 /// Sometimes it might be useful to disconnect the body reader from the body.
241 /// The reader returned by [`Body::as_reader()`] borrows the body, while this
242 /// variant consumes the body and turns it into a reader with lifetime `'static`.
243 /// The reader can for instance be sent to another thread.
244 ///
245 /// * Reader is not limited by default. That means a malicious server could
246 /// exhaust all avaliable memory on your client machine.
247 /// To set a limit use [`Body::into_with_config()`].
248 /// * Reader will error if `Content-Length` is set, but the connection is closed
249 /// before all bytes are received.
250 ///
251 /// ```
252 /// use std::io::Read;
253 ///
254 /// let res = ureq::get("http://httpbin.org/bytes/100")
255 /// .call()?;
256 ///
257 /// let (_, body) = res.into_parts();
258 ///
259 /// let mut bytes: Vec<u8> = Vec::with_capacity(1000);
260 /// body.into_reader()
261 /// .read_to_end(&mut bytes)?;
262 /// # Ok::<_, ureq::Error>(())
263 /// ```
264 pub fn into_reader(self) -> BodyReader<'static> {
265 self.into_with_config().reader()
266 }
267
268 /// Read the response as a string.
269 ///
270 /// * Response is limited to 10MB
271 /// * Replaces incorrect utf-8 chars to `?`
272 ///
273 /// To change these defaults use [`Body::with_config()`].
274 ///
275 /// ```
276 /// let mut res = ureq::get("http://httpbin.org/robots.txt")
277 /// .call()?;
278 ///
279 /// let s = res.body_mut().read_to_string()?;
280 /// assert_eq!(s, "User-agent: *\nDisallow: /deny\n");
281 /// # Ok::<_, ureq::Error>(())
282 /// ```
283 ///
284 /// For larger text files, you must explicitly increase the limit:
285 ///
286 /// ```
287 /// // Read a large text file (25MB)
288 /// let text = ureq::get("http://httpbin.org/get")
289 /// .call()?
290 /// .body_mut()
291 /// .with_config()
292 /// .limit(25 * 1024 * 1024) // 25MB
293 /// .read_to_string()?;
294 /// # Ok::<_, ureq::Error>(())
295 /// ```
296 pub fn read_to_string(&mut self) -> Result<String, Error> {
297 self.with_config()
298 .limit(MAX_BODY_SIZE)
299 .lossy_utf8(true)
300 .read_to_string()
301 }
302
303 /// Read the response to a vec.
304 ///
305 /// * Response is limited to 10MB.
306 ///
307 /// To change this default use [`Body::with_config()`].
308 /// ```
309 /// let mut res = ureq::get("http://httpbin.org/bytes/100")
310 /// .call()?;
311 ///
312 /// let bytes = res.body_mut().read_to_vec()?;
313 /// assert_eq!(bytes.len(), 100);
314 /// # Ok::<_, ureq::Error>(())
315 /// ```
316 ///
317 /// For larger files, you must explicitly increase the limit:
318 ///
319 /// ```
320 /// // Download a larger file (50MB)
321 /// let bytes = ureq::get("http://httpbin.org/bytes/200000000")
322 /// .call()?
323 /// .body_mut()
324 /// .with_config()
325 /// .limit(50 * 1024 * 1024) // 50MB
326 /// .read_to_vec()?;
327 /// # Ok::<_, ureq::Error>(())
328 /// ```
329 pub fn read_to_vec(&mut self) -> Result<Vec<u8>, Error> {
330 self.with_config()
331 //
332 .limit(MAX_BODY_SIZE)
333 .read_to_vec()
334 }
335
336 /// Read the response from JSON.
337 ///
338 /// * Response is limited to 10MB.
339 ///
340 /// To change this default use [`Body::with_config()`].
341 ///
342 /// The returned value is something that derives [`Deserialize`](serde::Deserialize).
343 /// You might need to be explicit with which type you want. See example below.
344 ///
345 /// ```
346 /// use serde::Deserialize;
347 ///
348 /// #[derive(Deserialize)]
349 /// struct BodyType {
350 /// slideshow: BodyTypeInner,
351 /// }
352 ///
353 /// #[derive(Deserialize)]
354 /// struct BodyTypeInner {
355 /// author: String,
356 /// }
357 ///
358 /// let body = ureq::get("https://httpbin.org/json")
359 /// .call()?
360 /// .body_mut()
361 /// .read_json::<BodyType>()?;
362 ///
363 /// assert_eq!(body.slideshow.author, "Yours Truly");
364 /// # Ok::<_, ureq::Error>(())
365 /// ```
366 ///
367 /// For larger JSON files, you must explicitly increase the limit:
368 ///
369 /// ```
370 /// use serde_json::Value;
371 ///
372 /// // Parse a large JSON file (30MB)
373 /// let json: Value = ureq::get("https://httpbin.org/json")
374 /// .call()?
375 /// .body_mut()
376 /// .with_config()
377 /// .limit(30 * 1024 * 1024) // 30MB
378 /// .read_json()?;
379 /// # Ok::<_, ureq::Error>(())
380 /// ```
381 #[cfg(feature = "json")]
382 pub fn read_json<T: serde::de::DeserializeOwned>(&mut self) -> Result<T, Error> {
383 let reader = self.with_config().limit(MAX_BODY_SIZE).reader();
384 let value: T = serde_json::from_reader(reader)?;
385 Ok(value)
386 }
387
388 /// Read the body data with configuration.
389 ///
390 /// This borrows the body which gives easier use with [`http::Response::body_mut()`].
391 /// To get a non-borrowed reader use [`Body::into_with_config()`].
392 ///
393 /// # Example
394 ///
395 /// ```
396 /// let reader = ureq::get("http://httpbin.org/bytes/100")
397 /// .call()?
398 /// .body_mut()
399 /// .with_config()
400 /// // Reader will only read 50 bytes
401 /// .limit(50)
402 /// .reader();
403 /// # Ok::<_, ureq::Error>(())
404 /// ```
405 pub fn with_config(&mut self) -> BodyWithConfig {
406 let handler = (&mut self.source).into();
407 BodyWithConfig::new(handler, self.info.clone())
408 }
409
410 /// Consume self and read the body with configuration.
411 ///
412 /// This consumes self and returns a reader with `'static` lifetime.
413 ///
414 /// # Example
415 ///
416 /// ```
417 /// // Get the body out of http::Response
418 /// let (_, body) = ureq::get("http://httpbin.org/bytes/100")
419 /// .call()?
420 /// .into_parts();
421 ///
422 /// let reader = body
423 /// .into_with_config()
424 /// // Reader will only read 50 bytes
425 /// .limit(50)
426 /// .reader();
427 /// # Ok::<_, ureq::Error>(())
428 /// ```
429 ///
430 /// This limit behavior can be used to prevent a malicious server from exhausting
431 /// memory on the client machine. For example, if the machine running
432 /// ureq has 1GB of RAM, you could protect the machine by setting a smaller
433 /// limit such as 128MB. The exact number will vary by your client's download
434 /// needs, available system resources, and system utilization.
435 pub fn into_with_config(self) -> BodyWithConfig<'static> {
436 let handler = self.source.into();
437 BodyWithConfig::new(handler, self.info)
438 }
439}
440
441/// Configuration of how to read the body.
442///
443/// Obtained via one of:
444///
445/// * [Body::with_config()]
446/// * [Body::into_with_config()]
447///
448/// # Handling large responses
449///
450/// The `BodyWithConfig` is the primary way to increase the default 10MB size limit
451/// when downloading large files to memory:
452///
453/// ```
454/// // Download a 50MB file
455/// let large_data = ureq::get("http://httpbin.org/bytes/200000000")
456/// .call()?
457/// .body_mut()
458/// .with_config()
459/// .limit(50 * 1024 * 1024) // 50MB
460/// .read_to_vec()?;
461/// # Ok::<_, ureq::Error>(())
462/// ```
463pub struct BodyWithConfig<'a> {
464 handler: BodySourceRef<'a>,
465 info: Arc<ResponseInfo>,
466 limit: u64,
467 lossy_utf8: bool,
468}
469
470impl<'a> BodyWithConfig<'a> {
471 fn new(handler: BodySourceRef<'a>, info: Arc<ResponseInfo>) -> Self {
472 BodyWithConfig {
473 handler,
474 info,
475 limit: u64::MAX,
476 lossy_utf8: false,
477 }
478 }
479
480 /// Limit the response body.
481 ///
482 /// Controls how many bytes we should read before throwing an error. This is used
483 /// to ensure RAM isn't exhausted by a server sending a very large response body.
484 ///
485 /// The default limit is `u64::MAX` (unlimited).
486 pub fn limit(mut self, value: u64) -> Self {
487 self.limit = value;
488 self
489 }
490
491 /// Replace invalid utf-8 chars.
492 ///
493 /// `true` means that broken utf-8 characters are replaced by a question mark `?`
494 /// (not utf-8 replacement char). This happens after charset conversion regardless of
495 /// whether the **charset** feature is enabled or not.
496 ///
497 /// The default is `false`.
498 pub fn lossy_utf8(mut self, value: bool) -> Self {
499 self.lossy_utf8 = value;
500 self
501 }
502
503 fn do_build(self) -> BodyReader<'a> {
504 BodyReader::new(
505 LimitReader::new(self.handler, self.limit),
506 &self.info,
507 self.info.body_mode,
508 self.lossy_utf8,
509 )
510 }
511
512 /// Creates a reader.
513 ///
514 /// The reader is either shared or owned, depending on `with_config` or `into_with_config`.
515 ///
516 /// # Example of owned vs shared
517 ///
518 /// ```
519 /// // Creates an owned reader.
520 /// let reader = ureq::get("https://httpbin.org/get")
521 /// .call()?
522 /// .into_body()
523 /// // takes ownership of Body
524 /// .into_with_config()
525 /// .limit(10)
526 /// .reader();
527 /// # Ok::<_, ureq::Error>(())
528 /// ```
529 ///
530 /// ```
531 /// // Creates a shared reader.
532 /// let reader = ureq::get("https://httpbin.org/get")
533 /// .call()?
534 /// .body_mut()
535 /// // borrows Body
536 /// .with_config()
537 /// .limit(10)
538 /// .reader();
539 /// # Ok::<_, ureq::Error>(())
540 /// ```
541 pub fn reader(self) -> BodyReader<'a> {
542 self.do_build()
543 }
544
545 /// Read into string.
546 ///
547 /// *Caution:* without a preceeding [`limit()`][BodyWithConfig::limit], this
548 /// becomes an unbounded sized `String`. A bad server could exhaust your memory.
549 ///
550 /// # Example
551 ///
552 /// ```
553 /// // Reads max 10k to a String.
554 /// let string = ureq::get("https://httpbin.org/get")
555 /// .call()?
556 /// .body_mut()
557 /// .with_config()
558 /// // Important. Limits body to 10k
559 /// .limit(10_000)
560 /// .read_to_string()?;
561 /// # Ok::<_, ureq::Error>(())
562 /// ```
563 pub fn read_to_string(self) -> Result<String, Error> {
564 use std::io::Read;
565 let mut reader = self.do_build();
566 let mut buf = String::new();
567 reader.read_to_string(&mut buf)?;
568 Ok(buf)
569 }
570
571 /// Read into vector.
572 ///
573 /// *Caution:* without a preceeding [`limit()`][BodyWithConfig::limit], this
574 /// becomes an unbounded sized `Vec`. A bad server could exhaust your memory.
575 ///
576 /// # Example
577 ///
578 /// ```
579 /// // Reads max 10k to a Vec.
580 /// let myvec = ureq::get("https://httpbin.org/get")
581 /// .call()?
582 /// .body_mut()
583 /// .with_config()
584 /// // Important. Limits body to 10k
585 /// .limit(10_000)
586 /// .read_to_vec()?;
587 /// # Ok::<_, ureq::Error>(())
588 /// ```
589 pub fn read_to_vec(self) -> Result<Vec<u8>, Error> {
590 use std::io::Read;
591 let mut reader = self.do_build();
592 let mut buf = Vec::new();
593 reader.read_to_end(&mut buf)?;
594 Ok(buf)
595 }
596
597 /// Read JSON body.
598 ///
599 /// *Caution:* without a preceeding [`limit()`][BodyWithConfig::limit], this
600 /// becomes an unbounded sized `String`. A bad server could exhaust your memory.
601 ///
602 /// # Example
603 ///
604 /// ```
605 /// use serde_json::Value;
606 ///
607 /// // Reads max 10k as a JSON value.
608 /// let json: Value = ureq::get("https://httpbin.org/get")
609 /// .call()?
610 /// .body_mut()
611 /// .with_config()
612 /// // Important. Limits body to 10k
613 /// .limit(10_000)
614 /// .read_json()?;
615 /// # Ok::<_, ureq::Error>(())
616 /// ```
617 #[cfg(feature = "json")]
618 pub fn read_json<T: serde::de::DeserializeOwned>(self) -> Result<T, Error> {
619 let reader = self.do_build();
620 let value: T = serde_json::from_reader(reader)?;
621 Ok(value)
622 }
623}
624
625#[derive(Debug, Clone, Copy)]
626enum ContentEncoding {
627 None,
628 Gzip,
629 Brotli,
630 Unknown,
631}
632
633impl ResponseInfo {
634 pub fn new(headers: &http::HeaderMap, body_mode: BodyMode) -> Self {
635 let content_encoding = headers
636 .get(header::CONTENT_ENCODING)
637 .and_then(|v| v.to_str().ok())
638 .map(ContentEncoding::from)
639 .unwrap_or(ContentEncoding::None);
640
641 let (mime_type, charset) = headers
642 .get(header::CONTENT_TYPE)
643 .and_then(|v| v.to_str().ok())
644 .map(split_content_type)
645 .unwrap_or((None, None));
646
647 ResponseInfo {
648 content_encoding,
649 mime_type,
650 charset,
651 body_mode,
652 }
653 }
654
655 /// Returns true if the body will be decompressed (gzip or brotli).
656 pub(crate) fn is_decompressing(&self) -> bool {
657 match self.content_encoding {
658 #[cfg(feature = "gzip")]
659 ContentEncoding::Gzip => true,
660 #[cfg(feature = "brotli")]
661 ContentEncoding::Brotli => true,
662 _ => false,
663 }
664 }
665
666 /// Whether the mime type indicats text.
667 fn is_text(&self) -> bool {
668 self.mime_type
669 .as_deref()
670 .map(|s| s.starts_with("text/"))
671 .unwrap_or(false)
672 }
673}
674
675fn split_content_type(content_type: &str) -> (Option<String>, Option<String>) {
676 // Content-Type: text/plain; charset=iso-8859-1
677 let mut split = content_type.split(';');
678
679 let Some(mime_type) = split.next() else {
680 return (None, None);
681 };
682
683 let mut charset = None;
684
685 for maybe_charset in split {
686 let maybe_charset = maybe_charset.trim();
687 if let Some(s) = maybe_charset.strip_prefix("charset=") {
688 charset = Some(s.to_string());
689 }
690 }
691
692 (Some(mime_type.to_string()), charset)
693}
694
695/// A reader of the response data.
696///
697/// 1. If `Transfer-Encoding: chunked`, the returned reader will unchunk it
698/// and any `Content-Length` header is ignored.
699/// 2. If `Content-Encoding: gzip` (or `br`) and the corresponding feature
700/// flag is enabled (**gzip** and **brotli**), decompresses the body data.
701/// 3. Given a header like `Content-Type: text/plain; charset=ISO-8859-1`
702/// and the **charset** feature enabled, will translate the body to utf-8.
703/// This mechanic need two components a mime-type starting `text/` and
704/// a non-utf8 charset indication.
705/// 4. If `Content-Length` is set, the returned reader is limited to this byte
706/// length regardless of how many bytes the server sends.
707/// 5. If no length header, the reader is until server stream end.
708/// 6. The limit in the body method used to obtain the reader.
709///
710/// Note: The reader is also limited by the [`Body::as_reader`] and
711/// [`Body::into_reader`] calls. If that limit is set very high, a malicious
712/// server might return enough bytes to exhaust available memory. If you're
713/// making requests to untrusted servers, you should use set that
714/// limit accordingly.
715///
716/// # Example
717///
718/// ```
719/// use std::io::Read;
720/// let mut res = ureq::get("http://httpbin.org/bytes/100")
721/// .call()?;
722///
723/// assert!(res.headers().contains_key("Content-Length"));
724/// let len: usize = res.headers().get("Content-Length")
725/// .unwrap().to_str().unwrap().parse().unwrap();
726///
727/// let mut bytes: Vec<u8> = Vec::with_capacity(len);
728/// res.body_mut().as_reader()
729/// .read_to_end(&mut bytes)?;
730///
731/// assert_eq!(bytes.len(), len);
732/// # Ok::<_, ureq::Error>(())
733/// ```
734pub struct BodyReader<'a> {
735 reader: MaybeLossyDecoder<CharsetDecoder<ContentDecoder<LimitReader<BodySourceRef<'a>>>>>,
736 // If this reader is used as SendBody for another request, this
737 // body mode can indiciate the content-length. Gzip, charset etc
738 // would mean input is not same as output.
739 outgoing_body_mode: BodyMode,
740}
741
742impl<'a> BodyReader<'a> {
743 fn new(
744 reader: LimitReader<BodySourceRef<'a>>,
745 info: &ResponseInfo,
746 incoming_body_mode: BodyMode,
747 lossy_utf8: bool,
748 ) -> BodyReader<'a> {
749 // This is outgoing body_mode in case we are using the BodyReader as a send body
750 // in a proxy situation.
751 let mut outgoing_body_mode = incoming_body_mode;
752
753 let reader = match info.content_encoding {
754 ContentEncoding::None | ContentEncoding::Unknown => ContentDecoder::PassThrough(reader),
755 #[cfg(feature = "gzip")]
756 ContentEncoding::Gzip => {
757 debug!("Decoding gzip");
758 outgoing_body_mode = BodyMode::Chunked;
759 ContentDecoder::Gzip(Box::new(gzip::GzipDecoder::new(reader)))
760 }
761 #[cfg(not(feature = "gzip"))]
762 ContentEncoding::Gzip => ContentDecoder::PassThrough(reader),
763 #[cfg(feature = "brotli")]
764 ContentEncoding::Brotli => {
765 debug!("Decoding brotli");
766 outgoing_body_mode = BodyMode::Chunked;
767 ContentDecoder::Brotli(Box::new(brotli::BrotliDecoder::new(reader)))
768 }
769 #[cfg(not(feature = "brotli"))]
770 ContentEncoding::Brotli => ContentDecoder::PassThrough(reader),
771 };
772
773 let reader = if info.is_text() {
774 charset_decoder(
775 reader,
776 info.mime_type.as_deref(),
777 info.charset.as_deref(),
778 &mut outgoing_body_mode,
779 )
780 } else {
781 CharsetDecoder::PassThrough(reader)
782 };
783
784 let reader = if info.is_text() && lossy_utf8 {
785 MaybeLossyDecoder::Lossy(LossyUtf8Reader::new(reader))
786 } else {
787 MaybeLossyDecoder::PassThrough(reader)
788 };
789
790 BodyReader {
791 outgoing_body_mode,
792 reader,
793 }
794 }
795
796 pub(crate) fn body_mode(&self) -> BodyMode {
797 self.outgoing_body_mode
798 }
799}
800
801#[allow(unused)]
802fn charset_decoder<R: io::Read>(
803 reader: R,
804 mime_type: Option<&str>,
805 charset: Option<&str>,
806 body_mode: &mut BodyMode,
807) -> CharsetDecoder<R> {
808 #[cfg(feature = "charset")]
809 {
810 use encoding_rs::{Encoding, UTF_8};
811
812 let from = charset
813 .and_then(|c| Encoding::for_label(c.as_bytes()))
814 .unwrap_or(UTF_8);
815
816 if from == UTF_8 {
817 // Do nothing
818 CharsetDecoder::PassThrough(reader)
819 } else {
820 debug!("Decoding charset {}", from.name());
821 *body_mode = BodyMode::Chunked;
822 CharsetDecoder::Decoder(self::charset::CharCodec::new(reader, from, UTF_8))
823 }
824 }
825
826 #[cfg(not(feature = "charset"))]
827 {
828 CharsetDecoder::PassThrough(reader)
829 }
830}
831
832enum MaybeLossyDecoder<R> {
833 Lossy(LossyUtf8Reader<R>),
834 PassThrough(R),
835}
836
837impl<R: io::Read> io::Read for MaybeLossyDecoder<R> {
838 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
839 match self {
840 MaybeLossyDecoder::Lossy(r) => r.read(buf),
841 MaybeLossyDecoder::PassThrough(r) => r.read(buf),
842 }
843 }
844}
845
846impl<'a> io::Read for BodyReader<'a> {
847 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
848 self.reader.read(buf)
849 }
850}
851
852enum CharsetDecoder<R> {
853 #[cfg(feature = "charset")]
854 Decoder(charset::CharCodec<R>),
855 PassThrough(R),
856}
857
858impl<R: io::Read> io::Read for CharsetDecoder<R> {
859 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
860 match self {
861 #[cfg(feature = "charset")]
862 CharsetDecoder::Decoder(v) => v.read(buf),
863 CharsetDecoder::PassThrough(v) => v.read(buf),
864 }
865 }
866}
867
868enum ContentDecoder<R: io::Read> {
869 #[cfg(feature = "gzip")]
870 Gzip(Box<gzip::GzipDecoder<R>>),
871 #[cfg(feature = "brotli")]
872 Brotli(Box<brotli::BrotliDecoder<R>>),
873 PassThrough(R),
874}
875
876impl<R: io::Read> io::Read for ContentDecoder<R> {
877 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
878 match self {
879 #[cfg(feature = "gzip")]
880 ContentDecoder::Gzip(v) => v.read(buf),
881 #[cfg(feature = "brotli")]
882 ContentDecoder::Brotli(v) => v.read(buf),
883 ContentDecoder::PassThrough(v) => v.read(buf),
884 }
885 }
886}
887
888impl fmt::Debug for Body {
889 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
890 f.debug_struct("Body").finish()
891 }
892}
893
894impl From<&str> for ContentEncoding {
895 fn from(s: &str) -> Self {
896 match s {
897 "gzip" => ContentEncoding::Gzip,
898 "br" => ContentEncoding::Brotli,
899 _ => {
900 debug!("Unknown content-encoding: {}", s);
901 ContentEncoding::Unknown
902 }
903 }
904 }
905}
906
907impl<'a> From<&'a mut BodyDataSource> for BodySourceRef<'a> {
908 fn from(value: &'a mut BodyDataSource) -> Self {
909 match value {
910 BodyDataSource::Handler(v) => Self::HandlerShared(v),
911 BodyDataSource::Reader(v) => Self::ReaderShared(v),
912 }
913 }
914}
915
916impl From<BodyDataSource> for BodySourceRef<'static> {
917 fn from(value: BodyDataSource) -> Self {
918 match value {
919 BodyDataSource::Handler(v) => Self::HandlerOwned(v),
920 BodyDataSource::Reader(v) => Self::ReaderOwned(v),
921 }
922 }
923}
924
925pub(crate) enum BodySourceRef<'a> {
926 HandlerShared(&'a mut BodyHandler),
927 HandlerOwned(Box<BodyHandler>),
928 ReaderShared(&'a mut (dyn io::Read + Send + Sync)),
929 ReaderOwned(Box<dyn io::Read + Send + Sync>),
930}
931
932impl<'a> io::Read for BodySourceRef<'a> {
933 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
934 match self {
935 BodySourceRef::HandlerShared(v) => v.read(buf),
936 BodySourceRef::HandlerOwned(v) => v.read(buf),
937 BodySourceRef::ReaderShared(v) => v.read(buf),
938 BodySourceRef::ReaderOwned(v) => v.read(buf),
939 }
940 }
941}
942
943#[cfg(all(test, feature = "_test"))]
944mod test {
945 use crate::Error;
946 use crate::test::init_test_log;
947 use crate::transport::set_handler;
948
949 #[test]
950 fn content_type_without_charset() {
951 init_test_log();
952 set_handler("/get", 200, &[("content-type", "application/json")], b"{}");
953
954 let res = crate::get("https://my.test/get").call().unwrap();
955 assert_eq!(res.body().mime_type(), Some("application/json"));
956 assert!(res.body().charset().is_none());
957 }
958
959 #[test]
960 fn content_type_with_charset() {
961 init_test_log();
962 set_handler(
963 "/get",
964 200,
965 &[("content-type", "application/json; charset=iso-8859-4")],
966 b"{}",
967 );
968
969 let res = crate::get("https://my.test/get").call().unwrap();
970 assert_eq!(res.body().mime_type(), Some("application/json"));
971 assert_eq!(res.body().charset(), Some("iso-8859-4"));
972 }
973
974 #[test]
975 fn chunked_transfer() {
976 init_test_log();
977
978 let s = "3\r\n\
979 hel\r\n\
980 b\r\n\
981 lo world!!!\r\n\
982 0\r\n\
983 \r\n";
984
985 set_handler(
986 "/get",
987 200,
988 &[("transfer-encoding", "chunked")],
989 s.as_bytes(),
990 );
991
992 let mut res = crate::get("https://my.test/get").call().unwrap();
993 let b = res.body_mut().read_to_string().unwrap();
994 assert_eq!(b, "hello world!!!");
995 }
996
997 #[test]
998 fn large_response_header() {
999 init_test_log();
1000 set_handler(
1001 "/get",
1002 200,
1003 &[("content-type", &"b".repeat(64 * 1024))],
1004 b"{}",
1005 );
1006
1007 let err = crate::get("https://my.test/get").call().unwrap_err();
1008 assert!(matches!(err, Error::LargeResponseHeader(_, _)));
1009 }
1010}