1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
//! Info about the HTTP transfer

use crate::WebpageOptions;

use curl::easy::Easy;
use std::io;
use std::time::Duration;

#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct HTTP {
    /// The external ip address (v4 or v6)
    pub ip: String,
    /// Duration of the HTTP call
    pub transfer_time: Duration,
    /// Number of redirections encountered
    pub redirect_count: u32,
    /// HTTP content type returned
    pub content_type: String,
    /// HTTP response code returned
    pub response_code: u32,
    /// All HTTP response headers
    pub headers: Vec<String>,
    /// Effective URL that was visited
    pub url: String,
    /// HTTP body
    pub body: String,
}

impl HTTP {
    /// Fetch a webpage from the given URL
    ///
    /// ## Examples
    /// ```
    /// use webpage::HTTP;
    /// use webpage::WebpageOptions;
    ///
    /// let info = HTTP::fetch("http://example.org", WebpageOptions::default());
    /// assert!(info.is_ok());
    ///
    /// let info = HTTP::fetch("mal formed or unreachable", WebpageOptions::default());
    /// assert!(info.is_err());
    /// ```
    pub fn fetch(url: &str, options: WebpageOptions) -> Result<Self, io::Error> {
        let mut handle = Easy::new();

        // configure
        handle.ssl_verify_peer(!options.allow_insecure)?;
        handle.ssl_verify_host(!options.allow_insecure)?;
        handle.timeout(options.timeout)?;
        handle.follow_location(options.follow_location)?;
        handle.max_redirections(options.max_redirections)?;
        handle.useragent(&options.useragent)?;

        handle.url(url)?;

        let mut headers = Vec::new();
        let mut body = Vec::new();
        {
            let mut transfer = handle.transfer();
            transfer.header_function(|new_data| {
                let header = String::from_utf8_lossy(new_data)
                    .into_owned()
                    .trim()
                    .to_string();

                // clear list on redirects
                if header.starts_with("HTTP/") {
                    headers = Vec::new();
                }

                if !header.is_empty() {
                    headers.push(header);
                }

                true
            })?;

            transfer.write_function(|new_data| {
                body.extend_from_slice(new_data);
                Ok(new_data.len())
            })?;

            transfer.perform()?;
        }

        let body = String::from_utf8_lossy(&body).into_owned();

        Ok(HTTP {
            ip: handle.primary_ip()?.unwrap_or("").to_string(),
            transfer_time: handle.total_time()?,
            redirect_count: handle.redirect_count()?,
            content_type: handle.content_type()?.unwrap_or("").to_string(),
            response_code: handle.response_code()?,
            url: handle.effective_url()?.unwrap_or("").to_string(),

            headers,
            body,
        })
    }
}