Crate webpage[][src]

Expand description

Small library to fetch info about a web page: title, description, language, HTTP info, RSS feeds, Opengraph, Schema.org, and more

Usage

use webpage::{Webpage, WebpageOptions};

let info = Webpage::from_url("http://example.org", WebpageOptions::default())
    .expect("Could not read from URL");

// the HTTP transfer info
let http = info.http;

// assert_eq!(http.ip, "54.192.129.71".to_string());
assert!(http.headers[0].starts_with("HTTP"));
assert!(http.body.starts_with("<!doctype html>"));
assert_eq!(http.url, "http://example.org".to_string()); // effective url
assert_eq!(http.content_type, "text/html; charset=UTF-8".to_string());

// the parsed HTML info
let html = info.html;

assert_eq!(html.title, Some("Example Domain".to_string()));
assert_eq!(html.description, None);
assert_eq!(html.opengraph.og_type, "website".to_string());

You can also get HTML info about local data:

use webpage::HTML;
let html = HTML::from_file("index.html", None);
// or let html = HTML::from_string(input, None);

Options

The following configurations are available:

pub struct WebpageOptions {
    allow_insecure: bool,
    follow_location: bool,
    max_redirections: u32,
    timeout: std::time::Duration,
    useragent: String,
}
use webpage::{Webpage, WebpageOptions};

let options = WebpageOptions { allow_insecure: true, ..Default::default() };
let info = Webpage::from_url("https://example.org", options).expect("Halp, could not fetch");

Re-exports

pub use crate::html::HTML;
pub use crate::http::HTTP;
pub use crate::opengraph::Opengraph;
pub use crate::opengraph::OpengraphObject;
pub use crate::schema_org::SchemaOrg;

Modules

html

Info from the parsed HTML document

http

Info about the HTTP transfer

opengraph

OpenGraph information

schema_org

Schema.org information

Structs

Webpage

Resulting info for a webpage

WebpageOptions

Configuration options