Skip to main content

ureq/body/
mod.rs

1use std::fmt;
2use std::io;
3use std::sync::Arc;
4
5pub use build::BodyBuilder;
6use ureq_proto::BodyMode;
7use ureq_proto::http::header;
8
9use crate::Error;
10use crate::http;
11use crate::run::BodyHandler;
12
13use self::limit::LimitReader;
14use self::lossy::LossyUtf8Reader;
15
16mod build;
17mod limit;
18mod lossy;
19
20#[cfg(feature = "charset")]
21mod charset;
22
23#[cfg(feature = "gzip")]
24mod gzip;
25
26#[cfg(feature = "brotli")]
27mod brotli;
28
29/// Default max body size for read_to_string() and read_to_vec().
30const MAX_BODY_SIZE: u64 = 10 * 1024 * 1024;
31
32/// A response body returned as [`http::Response<Body>`].
33///
34/// # Default size limit
35///
36/// Methods like `read_to_string()`, `read_to_vec()`, and `read_json()` have a **default 10MB limit**
37/// to prevent memory exhaustion. To download larger files, use `with_config().limit(new_size)`:
38///
39/// ```
40/// // Download a 20MB file
41/// let bytes = ureq::get("http://httpbin.org/bytes/200000000")
42///     .call()?
43///     .body_mut()
44///     .with_config()
45///     .limit(20 * 1024 * 1024) // 20MB
46///     .read_to_vec()?;
47/// # Ok::<_, ureq::Error>(())
48/// ```
49///
50/// # Body lengths
51///
52/// HTTP/1.1 has two major modes of transfering body data. Either a `Content-Length`
53/// header defines exactly how many bytes to transfer, or `Transfer-Encoding: chunked`
54/// facilitates a streaming style when the size is not known up front.
55///
56/// To protect against a problem called [request smuggling], ureq has heuristics for
57/// how to interpret a server sending both `Transfer-Encoding` and `Content-Length` headers.
58///
59/// 1. `chunked` takes precedence if there both headers are present (not for HTTP/1.0)
60/// 2. `content-length` is used if there is no chunked
61/// 3. If there are no headers, fall back on "close delimited" meaning the socket
62///    must close to end the body
63///
64/// When a `Content-Length` header is used, ureq will ensure the received body is _EXACTLY_
65/// as many bytes as declared (it cannot be less). This mechanic is in `ureq-proto`
66/// and is different to the [`BodyWithConfig::limit()`] below.
67///
68/// # Pool reuse
69///
70/// To return a connection (aka [`Transport`][crate::unversioned::transport::Transport])
71/// to the Agent's pool, the body must be read to end. If [`BodyWithConfig::limit()`] is set
72/// shorter size than the actual response body, the connection will not be reused.
73///
74/// # Example
75///
76/// ```
77/// use std::io::Read;
78/// let mut res = ureq::get("http://httpbin.org/bytes/100")
79///     .call()?;
80///
81/// assert!(res.headers().contains_key("Content-Length"));
82/// let len: usize = res.headers().get("Content-Length")
83///     .unwrap().to_str().unwrap().parse().unwrap();
84///
85/// let mut bytes: Vec<u8> = Vec::with_capacity(len);
86/// res.body_mut().as_reader()
87///     .read_to_end(&mut bytes)?;
88///
89/// assert_eq!(bytes.len(), len);
90/// # Ok::<_, ureq::Error>(())
91/// ```
92///
93/// [request smuggling]: https://en.wikipedia.org/wiki/HTTP_request_smuggling
94pub struct Body {
95    source: BodyDataSource,
96    info: Arc<ResponseInfo>,
97}
98
99enum BodyDataSource {
100    Handler(Box<BodyHandler>),
101    Reader(Box<dyn io::Read + Send + Sync>),
102}
103
104#[derive(Clone)]
105pub(crate) struct ResponseInfo {
106    content_encoding: ContentEncoding,
107    mime_type: Option<String>,
108    charset: Option<String>,
109    body_mode: BodyMode,
110}
111
112impl Body {
113    /// Builder for creating a body
114    ///
115    /// This is useful for testing, or for [`Middleware`][crate::middleware::Middleware] that
116    /// returns another body than the requested one.
117    pub fn builder() -> BodyBuilder {
118        BodyBuilder::new()
119    }
120
121    pub(crate) fn new(handler: BodyHandler, info: ResponseInfo) -> Self {
122        Body {
123            source: BodyDataSource::Handler(Box::new(handler)),
124            info: Arc::new(info),
125        }
126    }
127
128    /// The mime-type of the `content-type` header.
129    ///
130    /// For the below header, we would get `Some("text/plain")`:
131    ///
132    /// ```text
133    ///     Content-Type: text/plain; charset=iso-8859-1
134    /// ```
135    ///
136    /// *Caution:* A bad server might set `Content-Type` to one thing and send
137    /// something else. There is no way ureq can verify this.
138    ///
139    /// # Example
140    ///
141    /// ```
142    /// let res = ureq::get("https://www.google.com/")
143    ///     .call()?;
144    ///
145    /// assert_eq!(res.body().mime_type(), Some("text/html"));
146    /// # Ok::<_, ureq::Error>(())
147    /// ```
148    pub fn mime_type(&self) -> Option<&str> {
149        self.info.mime_type.as_deref()
150    }
151
152    /// The charset of the `content-type` header.
153    ///
154    /// For the below header, we would get `Some("iso-8859-1")`:
155    ///
156    /// ```text
157    ///     Content-Type: text/plain; charset=iso-8859-1
158    /// ```
159    ///
160    /// *Caution:* A bad server might set `Content-Type` to one thing and send
161    /// something else. There is no way ureq can verify this.
162    ///
163    /// # Example
164    ///
165    /// ```
166    /// let res = ureq::get("https://www.google.com/")
167    ///     .call()?;
168    ///
169    /// assert_eq!(res.body().charset(), Some("ISO-8859-1"));
170    /// # Ok::<_, ureq::Error>(())
171    /// ```
172    pub fn charset(&self) -> Option<&str> {
173        self.info.charset.as_deref()
174    }
175
176    /// The content length of the body.
177    ///
178    /// This is the value of the `Content-Length` header, if there is one. For chunked
179    /// responses (`Transfer-Encoding: chunked`) , this will be `None`. Similarly for
180    /// HTTP/1.0 without a `Content-Length` header, the response is close delimited,
181    /// which means the length is unknown.
182    ///
183    /// A bad server might set `Content-Length` to one thing and send something else.
184    /// ureq will double check this, see section on body length heuristics.
185    ///
186    /// # Example
187    ///
188    /// ```
189    /// let res = ureq::get("https://httpbin.org/bytes/100")
190    ///     .call()?;
191    ///
192    /// assert_eq!(res.body().content_length(), Some(100));
193    /// # Ok::<_, ureq::Error>(())
194    /// ```
195    pub fn content_length(&self) -> Option<u64> {
196        // After transparent decompression, the original Content-Length no longer
197        // reflects the actual body size, so we return None.
198        if self.info.is_decompressing() {
199            return None;
200        }
201        match self.info.body_mode {
202            BodyMode::NoBody => None,
203            BodyMode::LengthDelimited(v) => Some(v),
204            BodyMode::Chunked => None,
205            BodyMode::CloseDelimited => None,
206        }
207    }
208
209    /// Handle this body as a shared `impl Read` of the body.
210    ///
211    /// This is the regular API which goes via [`http::Response::body_mut()`] to get a
212    /// mut reference to the `Body`, and then use `as_reader()`. It is also possible to
213    /// get a non-shared, owned reader via [`Body::into_reader()`].
214    ///
215    /// * Reader is not limited by default. That means a malicious server could
216    ///   exhaust all avaliable memory on your client machine.
217    ///   To set a limit use [`Body::into_with_config()`].
218    /// * Reader will error if `Content-Length` is set, but the connection is closed
219    ///   before all bytes are received.
220    ///
221    /// # Example
222    ///
223    /// ```
224    /// use std::io::Read;
225    ///
226    /// let mut res = ureq::get("http://httpbin.org/bytes/100")
227    ///     .call()?;
228    ///
229    /// let mut bytes: Vec<u8> = Vec::with_capacity(1000);
230    /// res.body_mut().as_reader()
231    ///     .read_to_end(&mut bytes)?;
232    /// # Ok::<_, ureq::Error>(())
233    /// ```
234    pub fn as_reader(&mut self) -> BodyReader {
235        self.with_config().reader()
236    }
237
238    /// Turn this response into an owned `impl Read` of the body.
239    ///
240    /// Sometimes it might be useful to disconnect the body reader from the body.
241    /// The reader returned by [`Body::as_reader()`] borrows the body, while this
242    /// variant consumes the body and turns it into a reader with lifetime `'static`.
243    /// The reader can for instance be sent to another thread.
244    ///
245    /// * Reader is not limited by default. That means a malicious server could
246    ///   exhaust all avaliable memory on your client machine.
247    ///   To set a limit use [`Body::into_with_config()`].
248    /// * Reader will error if `Content-Length` is set, but the connection is closed
249    ///   before all bytes are received.
250    ///
251    /// ```
252    /// use std::io::Read;
253    ///
254    /// let res = ureq::get("http://httpbin.org/bytes/100")
255    ///     .call()?;
256    ///
257    /// let (_, body) = res.into_parts();
258    ///
259    /// let mut bytes: Vec<u8> = Vec::with_capacity(1000);
260    /// body.into_reader()
261    ///     .read_to_end(&mut bytes)?;
262    /// # Ok::<_, ureq::Error>(())
263    /// ```
264    pub fn into_reader(self) -> BodyReader<'static> {
265        self.into_with_config().reader()
266    }
267
268    /// Read the response as a string.
269    ///
270    /// * Response is limited to 10MB
271    /// * Replaces incorrect utf-8 chars to `?`
272    ///
273    /// To change these defaults use [`Body::with_config()`].
274    ///
275    /// ```
276    /// let mut res = ureq::get("http://httpbin.org/robots.txt")
277    ///     .call()?;
278    ///
279    /// let s = res.body_mut().read_to_string()?;
280    /// assert_eq!(s, "User-agent: *\nDisallow: /deny\n");
281    /// # Ok::<_, ureq::Error>(())
282    /// ```
283    ///
284    /// For larger text files, you must explicitly increase the limit:
285    ///
286    /// ```
287    /// // Read a large text file (25MB)
288    /// let text = ureq::get("http://httpbin.org/get")
289    ///     .call()?
290    ///     .body_mut()
291    ///     .with_config()
292    ///     .limit(25 * 1024 * 1024) // 25MB
293    ///     .read_to_string()?;
294    /// # Ok::<_, ureq::Error>(())
295    /// ```
296    pub fn read_to_string(&mut self) -> Result<String, Error> {
297        self.with_config()
298            .limit(MAX_BODY_SIZE)
299            .lossy_utf8(true)
300            .read_to_string()
301    }
302
303    /// Read the response to a vec.
304    ///
305    /// * Response is limited to 10MB.
306    ///
307    /// To change this default use [`Body::with_config()`].
308    /// ```
309    /// let mut res = ureq::get("http://httpbin.org/bytes/100")
310    ///     .call()?;
311    ///
312    /// let bytes = res.body_mut().read_to_vec()?;
313    /// assert_eq!(bytes.len(), 100);
314    /// # Ok::<_, ureq::Error>(())
315    /// ```
316    ///
317    /// For larger files, you must explicitly increase the limit:
318    ///
319    /// ```
320    /// // Download a larger file (50MB)
321    /// let bytes = ureq::get("http://httpbin.org/bytes/200000000")
322    ///     .call()?
323    ///     .body_mut()
324    ///     .with_config()
325    ///     .limit(50 * 1024 * 1024) // 50MB
326    ///     .read_to_vec()?;
327    /// # Ok::<_, ureq::Error>(())
328    /// ```
329    pub fn read_to_vec(&mut self) -> Result<Vec<u8>, Error> {
330        self.with_config()
331            //
332            .limit(MAX_BODY_SIZE)
333            .read_to_vec()
334    }
335
336    /// Read the response from JSON.
337    ///
338    /// * Response is limited to 10MB.
339    ///
340    /// To change this default use [`Body::with_config()`].
341    ///
342    /// The returned value is something that derives [`Deserialize`](serde::Deserialize).
343    /// You might need to be explicit with which type you want. See example below.
344    ///
345    /// ```
346    /// use serde::Deserialize;
347    ///
348    /// #[derive(Deserialize)]
349    /// struct BodyType {
350    ///   slideshow: BodyTypeInner,
351    /// }
352    ///
353    /// #[derive(Deserialize)]
354    /// struct BodyTypeInner {
355    ///   author: String,
356    /// }
357    ///
358    /// let body = ureq::get("https://httpbin.org/json")
359    ///     .call()?
360    ///     .body_mut()
361    ///     .read_json::<BodyType>()?;
362    ///
363    /// assert_eq!(body.slideshow.author, "Yours Truly");
364    /// # Ok::<_, ureq::Error>(())
365    /// ```
366    ///
367    /// For larger JSON files, you must explicitly increase the limit:
368    ///
369    /// ```
370    /// use serde_json::Value;
371    ///
372    /// // Parse a large JSON file (30MB)
373    /// let json: Value = ureq::get("https://httpbin.org/json")
374    ///     .call()?
375    ///     .body_mut()
376    ///     .with_config()
377    ///     .limit(30 * 1024 * 1024) // 30MB
378    ///     .read_json()?;
379    /// # Ok::<_, ureq::Error>(())
380    /// ```
381    #[cfg(feature = "json")]
382    pub fn read_json<T: serde::de::DeserializeOwned>(&mut self) -> Result<T, Error> {
383        let reader = self.with_config().limit(MAX_BODY_SIZE).reader();
384        let value: T = serde_json::from_reader(reader)?;
385        Ok(value)
386    }
387
388    /// Read the body data with configuration.
389    ///
390    /// This borrows the body which gives easier use with [`http::Response::body_mut()`].
391    /// To get a non-borrowed reader use [`Body::into_with_config()`].
392    ///
393    /// # Example
394    ///
395    /// ```
396    /// let reader = ureq::get("http://httpbin.org/bytes/100")
397    ///     .call()?
398    ///     .body_mut()
399    ///     .with_config()
400    ///     // Reader will only read 50 bytes
401    ///     .limit(50)
402    ///     .reader();
403    /// # Ok::<_, ureq::Error>(())
404    /// ```
405    pub fn with_config(&mut self) -> BodyWithConfig {
406        let handler = (&mut self.source).into();
407        BodyWithConfig::new(handler, self.info.clone())
408    }
409
410    /// Consume self and read the body with configuration.
411    ///
412    /// This consumes self and returns a reader with `'static` lifetime.
413    ///
414    /// # Example
415    ///
416    /// ```
417    /// // Get the body out of http::Response
418    /// let (_, body) = ureq::get("http://httpbin.org/bytes/100")
419    ///     .call()?
420    ///     .into_parts();
421    ///
422    /// let reader = body
423    ///     .into_with_config()
424    ///     // Reader will only read 50 bytes
425    ///     .limit(50)
426    ///     .reader();
427    /// # Ok::<_, ureq::Error>(())
428    /// ```
429    ///
430    /// This limit behavior can be used to prevent a malicious server from exhausting
431    /// memory on the client machine. For example, if the machine running
432    /// ureq has 1GB of RAM, you could protect the machine by setting a smaller
433    /// limit such as 128MB. The exact number will vary by your client's download
434    /// needs, available system resources, and system utilization.
435    pub fn into_with_config(self) -> BodyWithConfig<'static> {
436        let handler = self.source.into();
437        BodyWithConfig::new(handler, self.info)
438    }
439}
440
441/// Configuration of how to read the body.
442///
443/// Obtained via one of:
444///
445/// * [Body::with_config()]
446/// * [Body::into_with_config()]
447///
448/// # Handling large responses
449///
450/// The `BodyWithConfig` is the primary way to increase the default 10MB size limit
451/// when downloading large files to memory:
452///
453/// ```
454/// // Download a 50MB file
455/// let large_data = ureq::get("http://httpbin.org/bytes/200000000")
456///     .call()?
457///     .body_mut()
458///     .with_config()
459///     .limit(50 * 1024 * 1024) // 50MB
460///     .read_to_vec()?;
461/// # Ok::<_, ureq::Error>(())
462/// ```
463pub struct BodyWithConfig<'a> {
464    handler: BodySourceRef<'a>,
465    info: Arc<ResponseInfo>,
466    limit: u64,
467    lossy_utf8: bool,
468}
469
470impl<'a> BodyWithConfig<'a> {
471    fn new(handler: BodySourceRef<'a>, info: Arc<ResponseInfo>) -> Self {
472        BodyWithConfig {
473            handler,
474            info,
475            limit: u64::MAX,
476            lossy_utf8: false,
477        }
478    }
479
480    /// Limit the response body.
481    ///
482    /// Controls how many bytes we should read before throwing an error. This is used
483    /// to ensure RAM isn't exhausted by a server sending a very large response body.
484    ///
485    /// The default limit is `u64::MAX` (unlimited).
486    pub fn limit(mut self, value: u64) -> Self {
487        self.limit = value;
488        self
489    }
490
491    /// Replace invalid utf-8 chars.
492    ///
493    /// `true` means that broken utf-8 characters are replaced by a question mark `?`
494    /// (not utf-8 replacement char). This happens after charset conversion regardless of
495    /// whether the **charset** feature is enabled or not.
496    ///
497    /// The default is `false`.
498    pub fn lossy_utf8(mut self, value: bool) -> Self {
499        self.lossy_utf8 = value;
500        self
501    }
502
503    fn do_build(self) -> BodyReader<'a> {
504        BodyReader::new(
505            LimitReader::new(self.handler, self.limit),
506            &self.info,
507            self.info.body_mode,
508            self.lossy_utf8,
509        )
510    }
511
512    /// Creates a reader.
513    ///
514    /// The reader is either shared or owned, depending on `with_config` or `into_with_config`.
515    ///
516    /// # Example of owned vs shared
517    ///
518    /// ```
519    /// // Creates an owned reader.
520    /// let reader = ureq::get("https://httpbin.org/get")
521    ///     .call()?
522    ///     .into_body()
523    ///     // takes ownership of Body
524    ///     .into_with_config()
525    ///     .limit(10)
526    ///     .reader();
527    /// # Ok::<_, ureq::Error>(())
528    /// ```
529    ///
530    /// ```
531    /// // Creates a shared reader.
532    /// let reader = ureq::get("https://httpbin.org/get")
533    ///     .call()?
534    ///     .body_mut()
535    ///     // borrows Body
536    ///     .with_config()
537    ///     .limit(10)
538    ///     .reader();
539    /// # Ok::<_, ureq::Error>(())
540    /// ```
541    pub fn reader(self) -> BodyReader<'a> {
542        self.do_build()
543    }
544
545    /// Read into string.
546    ///
547    /// *Caution:* without a preceeding [`limit()`][BodyWithConfig::limit], this
548    /// becomes an unbounded sized `String`. A bad server could exhaust your memory.
549    ///
550    /// # Example
551    ///
552    /// ```
553    /// // Reads max 10k to a String.
554    /// let string = ureq::get("https://httpbin.org/get")
555    ///     .call()?
556    ///     .body_mut()
557    ///     .with_config()
558    ///     // Important. Limits body to 10k
559    ///     .limit(10_000)
560    ///     .read_to_string()?;
561    /// # Ok::<_, ureq::Error>(())
562    /// ```
563    pub fn read_to_string(self) -> Result<String, Error> {
564        use std::io::Read;
565        let mut reader = self.do_build();
566        let mut buf = String::new();
567        reader.read_to_string(&mut buf)?;
568        Ok(buf)
569    }
570
571    /// Read into vector.
572    ///
573    /// *Caution:* without a preceeding [`limit()`][BodyWithConfig::limit], this
574    /// becomes an unbounded sized `Vec`. A bad server could exhaust your memory.
575    ///
576    /// # Example
577    ///
578    /// ```
579    /// // Reads max 10k to a Vec.
580    /// let myvec = ureq::get("https://httpbin.org/get")
581    ///     .call()?
582    ///     .body_mut()
583    ///     .with_config()
584    ///     // Important. Limits body to 10k
585    ///     .limit(10_000)
586    ///     .read_to_vec()?;
587    /// # Ok::<_, ureq::Error>(())
588    /// ```
589    pub fn read_to_vec(self) -> Result<Vec<u8>, Error> {
590        use std::io::Read;
591        let mut reader = self.do_build();
592        let mut buf = Vec::new();
593        reader.read_to_end(&mut buf)?;
594        Ok(buf)
595    }
596
597    /// Read JSON body.
598    ///
599    /// *Caution:* without a preceeding [`limit()`][BodyWithConfig::limit], this
600    /// becomes an unbounded sized `String`. A bad server could exhaust your memory.
601    ///
602    /// # Example
603    ///
604    /// ```
605    /// use serde_json::Value;
606    ///
607    /// // Reads max 10k as a JSON value.
608    /// let json: Value  = ureq::get("https://httpbin.org/get")
609    ///     .call()?
610    ///     .body_mut()
611    ///     .with_config()
612    ///     // Important. Limits body to 10k
613    ///     .limit(10_000)
614    ///     .read_json()?;
615    /// # Ok::<_, ureq::Error>(())
616    /// ```
617    #[cfg(feature = "json")]
618    pub fn read_json<T: serde::de::DeserializeOwned>(self) -> Result<T, Error> {
619        let reader = self.do_build();
620        let value: T = serde_json::from_reader(reader)?;
621        Ok(value)
622    }
623}
624
625#[derive(Debug, Clone, Copy)]
626enum ContentEncoding {
627    None,
628    Gzip,
629    Brotli,
630    Unknown,
631}
632
633impl ResponseInfo {
634    pub fn new(headers: &http::HeaderMap, body_mode: BodyMode) -> Self {
635        let content_encoding = headers
636            .get(header::CONTENT_ENCODING)
637            .and_then(|v| v.to_str().ok())
638            .map(ContentEncoding::from)
639            .unwrap_or(ContentEncoding::None);
640
641        let (mime_type, charset) = headers
642            .get(header::CONTENT_TYPE)
643            .and_then(|v| v.to_str().ok())
644            .map(split_content_type)
645            .unwrap_or((None, None));
646
647        ResponseInfo {
648            content_encoding,
649            mime_type,
650            charset,
651            body_mode,
652        }
653    }
654
655    /// Returns true if the body will be decompressed (gzip or brotli).
656    pub(crate) fn is_decompressing(&self) -> bool {
657        match self.content_encoding {
658            #[cfg(feature = "gzip")]
659            ContentEncoding::Gzip => true,
660            #[cfg(feature = "brotli")]
661            ContentEncoding::Brotli => true,
662            _ => false,
663        }
664    }
665
666    /// Whether the mime type indicats text.
667    fn is_text(&self) -> bool {
668        self.mime_type
669            .as_deref()
670            .map(|s| s.starts_with("text/"))
671            .unwrap_or(false)
672    }
673}
674
675fn split_content_type(content_type: &str) -> (Option<String>, Option<String>) {
676    // Content-Type: text/plain; charset=iso-8859-1
677    let mut split = content_type.split(';');
678
679    let Some(mime_type) = split.next() else {
680        return (None, None);
681    };
682
683    let mut charset = None;
684
685    for maybe_charset in split {
686        let maybe_charset = maybe_charset.trim();
687        if let Some(s) = maybe_charset.strip_prefix("charset=") {
688            charset = Some(s.to_string());
689        }
690    }
691
692    (Some(mime_type.to_string()), charset)
693}
694
695/// A reader of the response data.
696///
697/// 1. If `Transfer-Encoding: chunked`, the returned reader will unchunk it
698///    and any `Content-Length` header is ignored.
699/// 2. If `Content-Encoding: gzip` (or `br`) and the corresponding feature
700///    flag is enabled (**gzip** and **brotli**), decompresses the body data.
701/// 3. Given a header like `Content-Type: text/plain; charset=ISO-8859-1`
702///    and the **charset** feature enabled, will translate the body to utf-8.
703///    This mechanic need two components a mime-type starting `text/` and
704///    a non-utf8 charset indication.
705/// 4. If `Content-Length` is set, the returned reader is limited to this byte
706///    length regardless of how many bytes the server sends.
707/// 5. If no length header, the reader is until server stream end.
708/// 6. The limit in the body method used to obtain the reader.
709///
710/// Note: The reader is also limited by the [`Body::as_reader`] and
711/// [`Body::into_reader`] calls. If that limit is set very high, a malicious
712/// server might return enough bytes to exhaust available memory. If you're
713/// making requests to untrusted servers, you should use set that
714/// limit accordingly.
715///
716/// # Example
717///
718/// ```
719/// use std::io::Read;
720/// let mut res = ureq::get("http://httpbin.org/bytes/100")
721///     .call()?;
722///
723/// assert!(res.headers().contains_key("Content-Length"));
724/// let len: usize = res.headers().get("Content-Length")
725///     .unwrap().to_str().unwrap().parse().unwrap();
726///
727/// let mut bytes: Vec<u8> = Vec::with_capacity(len);
728/// res.body_mut().as_reader()
729///     .read_to_end(&mut bytes)?;
730///
731/// assert_eq!(bytes.len(), len);
732/// # Ok::<_, ureq::Error>(())
733/// ```
734pub struct BodyReader<'a> {
735    reader: MaybeLossyDecoder<CharsetDecoder<ContentDecoder<LimitReader<BodySourceRef<'a>>>>>,
736    // If this reader is used as SendBody for another request, this
737    // body mode can indiciate the content-length. Gzip, charset etc
738    // would mean input is not same as output.
739    outgoing_body_mode: BodyMode,
740}
741
742impl<'a> BodyReader<'a> {
743    fn new(
744        reader: LimitReader<BodySourceRef<'a>>,
745        info: &ResponseInfo,
746        incoming_body_mode: BodyMode,
747        lossy_utf8: bool,
748    ) -> BodyReader<'a> {
749        // This is outgoing body_mode in case we are using the BodyReader as a send body
750        // in a proxy situation.
751        let mut outgoing_body_mode = incoming_body_mode;
752
753        let reader = match info.content_encoding {
754            ContentEncoding::None | ContentEncoding::Unknown => ContentDecoder::PassThrough(reader),
755            #[cfg(feature = "gzip")]
756            ContentEncoding::Gzip => {
757                debug!("Decoding gzip");
758                outgoing_body_mode = BodyMode::Chunked;
759                ContentDecoder::Gzip(Box::new(gzip::GzipDecoder::new(reader)))
760            }
761            #[cfg(not(feature = "gzip"))]
762            ContentEncoding::Gzip => ContentDecoder::PassThrough(reader),
763            #[cfg(feature = "brotli")]
764            ContentEncoding::Brotli => {
765                debug!("Decoding brotli");
766                outgoing_body_mode = BodyMode::Chunked;
767                ContentDecoder::Brotli(Box::new(brotli::BrotliDecoder::new(reader)))
768            }
769            #[cfg(not(feature = "brotli"))]
770            ContentEncoding::Brotli => ContentDecoder::PassThrough(reader),
771        };
772
773        let reader = if info.is_text() {
774            charset_decoder(
775                reader,
776                info.mime_type.as_deref(),
777                info.charset.as_deref(),
778                &mut outgoing_body_mode,
779            )
780        } else {
781            CharsetDecoder::PassThrough(reader)
782        };
783
784        let reader = if info.is_text() && lossy_utf8 {
785            MaybeLossyDecoder::Lossy(LossyUtf8Reader::new(reader))
786        } else {
787            MaybeLossyDecoder::PassThrough(reader)
788        };
789
790        BodyReader {
791            outgoing_body_mode,
792            reader,
793        }
794    }
795
796    pub(crate) fn body_mode(&self) -> BodyMode {
797        self.outgoing_body_mode
798    }
799}
800
801#[allow(unused)]
802fn charset_decoder<R: io::Read>(
803    reader: R,
804    mime_type: Option<&str>,
805    charset: Option<&str>,
806    body_mode: &mut BodyMode,
807) -> CharsetDecoder<R> {
808    #[cfg(feature = "charset")]
809    {
810        use encoding_rs::{Encoding, UTF_8};
811
812        let from = charset
813            .and_then(|c| Encoding::for_label(c.as_bytes()))
814            .unwrap_or(UTF_8);
815
816        if from == UTF_8 {
817            // Do nothing
818            CharsetDecoder::PassThrough(reader)
819        } else {
820            debug!("Decoding charset {}", from.name());
821            *body_mode = BodyMode::Chunked;
822            CharsetDecoder::Decoder(self::charset::CharCodec::new(reader, from, UTF_8))
823        }
824    }
825
826    #[cfg(not(feature = "charset"))]
827    {
828        CharsetDecoder::PassThrough(reader)
829    }
830}
831
832enum MaybeLossyDecoder<R> {
833    Lossy(LossyUtf8Reader<R>),
834    PassThrough(R),
835}
836
837impl<R: io::Read> io::Read for MaybeLossyDecoder<R> {
838    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
839        match self {
840            MaybeLossyDecoder::Lossy(r) => r.read(buf),
841            MaybeLossyDecoder::PassThrough(r) => r.read(buf),
842        }
843    }
844}
845
846impl<'a> io::Read for BodyReader<'a> {
847    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
848        self.reader.read(buf)
849    }
850}
851
852enum CharsetDecoder<R> {
853    #[cfg(feature = "charset")]
854    Decoder(charset::CharCodec<R>),
855    PassThrough(R),
856}
857
858impl<R: io::Read> io::Read for CharsetDecoder<R> {
859    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
860        match self {
861            #[cfg(feature = "charset")]
862            CharsetDecoder::Decoder(v) => v.read(buf),
863            CharsetDecoder::PassThrough(v) => v.read(buf),
864        }
865    }
866}
867
868enum ContentDecoder<R: io::Read> {
869    #[cfg(feature = "gzip")]
870    Gzip(Box<gzip::GzipDecoder<R>>),
871    #[cfg(feature = "brotli")]
872    Brotli(Box<brotli::BrotliDecoder<R>>),
873    PassThrough(R),
874}
875
876impl<R: io::Read> io::Read for ContentDecoder<R> {
877    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
878        match self {
879            #[cfg(feature = "gzip")]
880            ContentDecoder::Gzip(v) => v.read(buf),
881            #[cfg(feature = "brotli")]
882            ContentDecoder::Brotli(v) => v.read(buf),
883            ContentDecoder::PassThrough(v) => v.read(buf),
884        }
885    }
886}
887
888impl fmt::Debug for Body {
889    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
890        f.debug_struct("Body").finish()
891    }
892}
893
894impl From<&str> for ContentEncoding {
895    fn from(s: &str) -> Self {
896        match s {
897            "gzip" => ContentEncoding::Gzip,
898            "br" => ContentEncoding::Brotli,
899            _ => {
900                debug!("Unknown content-encoding: {}", s);
901                ContentEncoding::Unknown
902            }
903        }
904    }
905}
906
907impl<'a> From<&'a mut BodyDataSource> for BodySourceRef<'a> {
908    fn from(value: &'a mut BodyDataSource) -> Self {
909        match value {
910            BodyDataSource::Handler(v) => Self::HandlerShared(v),
911            BodyDataSource::Reader(v) => Self::ReaderShared(v),
912        }
913    }
914}
915
916impl From<BodyDataSource> for BodySourceRef<'static> {
917    fn from(value: BodyDataSource) -> Self {
918        match value {
919            BodyDataSource::Handler(v) => Self::HandlerOwned(v),
920            BodyDataSource::Reader(v) => Self::ReaderOwned(v),
921        }
922    }
923}
924
925pub(crate) enum BodySourceRef<'a> {
926    HandlerShared(&'a mut BodyHandler),
927    HandlerOwned(Box<BodyHandler>),
928    ReaderShared(&'a mut (dyn io::Read + Send + Sync)),
929    ReaderOwned(Box<dyn io::Read + Send + Sync>),
930}
931
932impl<'a> io::Read for BodySourceRef<'a> {
933    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
934        match self {
935            BodySourceRef::HandlerShared(v) => v.read(buf),
936            BodySourceRef::HandlerOwned(v) => v.read(buf),
937            BodySourceRef::ReaderShared(v) => v.read(buf),
938            BodySourceRef::ReaderOwned(v) => v.read(buf),
939        }
940    }
941}
942
943#[cfg(all(test, feature = "_test"))]
944mod test {
945    use crate::Error;
946    use crate::test::init_test_log;
947    use crate::transport::set_handler;
948
949    #[test]
950    fn content_type_without_charset() {
951        init_test_log();
952        set_handler("/get", 200, &[("content-type", "application/json")], b"{}");
953
954        let res = crate::get("https://my.test/get").call().unwrap();
955        assert_eq!(res.body().mime_type(), Some("application/json"));
956        assert!(res.body().charset().is_none());
957    }
958
959    #[test]
960    fn content_type_with_charset() {
961        init_test_log();
962        set_handler(
963            "/get",
964            200,
965            &[("content-type", "application/json; charset=iso-8859-4")],
966            b"{}",
967        );
968
969        let res = crate::get("https://my.test/get").call().unwrap();
970        assert_eq!(res.body().mime_type(), Some("application/json"));
971        assert_eq!(res.body().charset(), Some("iso-8859-4"));
972    }
973
974    #[test]
975    fn chunked_transfer() {
976        init_test_log();
977
978        let s = "3\r\n\
979            hel\r\n\
980            b\r\n\
981            lo world!!!\r\n\
982            0\r\n\
983            \r\n";
984
985        set_handler(
986            "/get",
987            200,
988            &[("transfer-encoding", "chunked")],
989            s.as_bytes(),
990        );
991
992        let mut res = crate::get("https://my.test/get").call().unwrap();
993        let b = res.body_mut().read_to_string().unwrap();
994        assert_eq!(b, "hello world!!!");
995    }
996
997    #[test]
998    fn large_response_header() {
999        init_test_log();
1000        set_handler(
1001            "/get",
1002            200,
1003            &[("content-type", &"b".repeat(64 * 1024))],
1004            b"{}",
1005        );
1006
1007        let err = crate::get("https://my.test/get").call().unwrap_err();
1008        assert!(matches!(err, Error::LargeResponseHeader(_, _)));
1009    }
1010}