Struct robotxt::Robots

source ·
pub struct Robots { /* private fields */ }
Available on crate feature parser only.
Expand description

The set of directives related to the specific user-agent in the provided robots.txt file.

§Example

User-Agent: foobot
Disallow: *
Allow: /example/
Disallow: /example/nope.txt

§Usage

use robotxt::Robots;

let txt = // "...".as_bytes()
let r = Robots::from_bytes(txt, "foobot");
assert!(r.is_relative_allowed("/example/yeah.txt"));
assert!(!r.is_relative_allowed("/example/nope.txt"));
assert!(!r.is_relative_allowed("/invalid/path.txt"));

Implementations§

source§

impl Robots

source

pub fn from_bytes(robots: &[u8], user_agent: &str) -> Self

Creates a new instance from the byte slice.

use robotxt::Robots;

let txt = r#"
    User-Agent: foobot
    Disallow: *
    Allow: /example/
    Disallow: /example/nope.txt
"#.as_bytes();

let r = Robots::from_bytes(txt, "foobot");
assert!(r.is_relative_allowed("/example/yeah.txt"));
assert!(!r.is_relative_allowed("/example/nope.txt"));
assert!(!r.is_relative_allowed("/invalid/path.txt"));
source

pub fn from_reader<R: Read>(reader: R, user_agent: &str) -> Result<Self, Error>

Creates a new instance from the generic reader.

use robotxt::Robots;

// Let's pretend it's something that actually needs a reader.
// The std::io::Read trait is implemented for &[u8].
let reader = r#"
    User-Agent: foobot
    Disallow: *
    Allow: /example/
    Disallow: /example/nope.txt
"#.as_bytes();

let r = Robots::from_reader(reader, "foobot").unwrap();
assert!(r.is_relative_allowed("/example/yeah.txt"));
assert!(!r.is_relative_allowed("/example/nope.txt"));
assert!(!r.is_relative_allowed("/invalid/path.txt"));
source

pub fn from_access(access: AccessResult<'_>, user_agent: &str) -> Self

Creates a new instance from the AccessResult.

use robotxt::{AccessResult, Robots};

let r = Robots::from_access(AccessResult::Redirect, "foobot");
assert!(r.is_relative_allowed("/example/yeah.txt"));
assert!(r.is_relative_allowed("/example/nope.txt"));

let r = Robots::from_access(AccessResult::Unavailable, "foobot");
assert!(r.is_relative_allowed("/example/yeah.txt"));
assert!(r.is_relative_allowed("/example/nope.txt"));

let r = Robots::from_access(AccessResult::Unreachable, "foobot");
assert!(!r.is_relative_allowed("/example/yeah.txt"));
assert!(!r.is_relative_allowed("/example/nope.txt"));
source

pub fn from_always(always: bool, user_agent: &str) -> Self

Creates a new instance from the global rule.

use robotxt::Robots;

let r = Robots::from_always(true, "foobot");
assert!(r.is_relative_allowed("/example/yeah.txt"));
assert!(r.is_relative_allowed("/example/nope.txt"));
source

pub fn builder() -> RobotsBuilder

Available on crate feature builder only.

Creates a new builder with default settings. See RobotsBuilder::new.

source§

impl Robots

source

pub fn try_is_relative_allowed(&self, addr: &str) -> Option<bool>

Returns Some(true) if there is an explicit allow or the global rule. NOTE: Expects relative path.

use robotxt::Robots;

let txt = r#"
    User-Agent: foobot
    Allow: /example/
    Disallow: /example/nope.txt
"#.as_bytes();

let r = Robots::from_bytes(txt, "foobot");
assert_eq!(r.try_is_relative_allowed("/example/yeah.txt"), Some(true));
assert_eq!(r.try_is_relative_allowed("/example/nope.txt"), Some(false));
assert_eq!(r.try_is_relative_allowed("/invalid/path.txt"), None);
source

pub fn is_relative_allowed(&self, addr: &str) -> bool

Returns true if the path is allowed for the user-agent. NOTE: Expects relative path.

use robotxt::Robots;

let txt = r#"
    User-Agent: foobot
    Disallow: *
    Allow: /example/
    Disallow: /example/nope.txt
"#.as_bytes();

let r = Robots::from_bytes(txt, "foobot");
assert!(r.is_relative_allowed("/example/yeah.txt"));
assert!(!r.is_relative_allowed("/example/nope.txt"));
assert!(!r.is_relative_allowed("/invalid/path.txt"));
source

pub fn try_is_absolute_allowed(&self, addr: &Url) -> Option<bool>

Returns Some(true) if there is an explicit allow or the global rule. NOTE: Expects relative path.

use url::Url;
use robotxt::Robots;

let txt = r#"
    User-Agent: foobot
    Allow: /example/
    Disallow: /example/nope.txt
"#.as_bytes();

let r = Robots::from_bytes(txt, "foobot");
let base = Url::parse("https://example.com/").unwrap();
assert_eq!(r.try_is_absolute_allowed(&base.join("/example/yeah.txt").unwrap()), Some(true));
assert_eq!(r.try_is_absolute_allowed(&base.join("/example/nope.txt").unwrap()), Some(false));
assert_eq!(r.try_is_absolute_allowed(&base.join("/invalid/path.txt").unwrap()), None);
source

pub fn is_absolute_allowed(&self, addr: &Url) -> bool

Returns true if the path is allowed for the user-agent. NOTE: Ignores different host.

use url::Url;
use robotxt::Robots;

let txt = r#"
    User-Agent: foobot
    Disallow: *
    Allow: /example/
    Disallow: /example/nope.txt
"#.as_bytes();

let r = Robots::from_bytes(txt, "foobot");
let base = Url::parse("https://example.com/").unwrap();
assert!(r.is_absolute_allowed(&base.join("/example/yeah.txt").unwrap()));
assert!(!r.is_absolute_allowed(&base.join("/example/nope.txt").unwrap()));
assert!(!r.is_absolute_allowed(&base.join("/invalid/path.txt").unwrap()));
source

pub fn is_always(&self) -> Option<bool>

Returns Some(_) if the site is fully allowed or disallowed.

use robotxt::Robots;

let r = Robots::from_always(true, "foobot");
assert_eq!(r.is_always(), Some(true));

let r = Robots::from_always(false, "foobot");
assert_eq!(r.is_always(), Some(false));
source

pub fn user_agent(&self) -> &str

Returns the longest matching user-agent.

use robotxt::Robots;

let txt = r#"
    User-Agent: foo
    User-Agent: foobot
    User-Agent: foobot-images
"#.as_bytes();

let r = Robots::from_bytes(txt, "foobot-search");
assert_eq!(r.user_agent(), "foobot");
source

pub fn crawl_delay(&self) -> Option<Duration>

Returns the crawl-delay of the user-agent if specified.

use std::time::Duration;
use robotxt::Robots;

let txt = r#"
    User-Agent: foobot
    Crawl-Delay: 5
"#.as_bytes();

let r = Robots::from_bytes(txt, "foobot");
assert_eq!(r.crawl_delay(), Some(Duration::from_secs(5)));
source

pub fn sitemaps(&self) -> &[Url]

Returns all collected sitemaps.

use robotxt::Robots;

let txt = r#"
    Sitemap: https://example.com/sitemap_1.xml
    Sitemap: https://example.com/sitemap_2.xml
"#.as_bytes();

let r = Robots::from_bytes(txt, "foobot");
assert_eq!(r.sitemaps().len(), 2);
source

pub fn len(&self) -> Option<usize>

Returns the total amount of applied rules unless constructed with (or optimized to) the global rule.

source

pub fn is_empty(&self) -> Option<bool>

Returns true if there are no applied rules i.e. it is constructed with (or optimized to) the global rule.

Trait Implementations§

source§

impl Clone for Robots

source§

fn clone(&self) -> Robots

Returns a copy of the value. Read more
1.0.0 · source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more
source§

impl Debug for Robots

source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more
source§

impl<'de> Deserialize<'de> for Robots

source§

fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>
where __D: Deserializer<'de>,

Deserialize this value from the given Serde deserializer. Read more
source§

impl PartialEq for Robots

source§

fn eq(&self, other: &Robots) -> bool

This method tests for self and other values to be equal, and is used by ==.
1.0.0 · source§

fn ne(&self, other: &Rhs) -> bool

This method tests for !=. The default implementation is almost always sufficient, and should not be overridden without very good reason.
source§

impl Serialize for Robots

source§

fn serialize<__S>(&self, __serializer: __S) -> Result<__S::Ok, __S::Error>
where __S: Serializer,

Serialize this value into the given Serde serializer. Read more
source§

impl Eq for Robots

source§

impl StructuralPartialEq for Robots

Auto Trait Implementations§

Blanket Implementations§

source§

impl<T> Any for T
where T: 'static + ?Sized,

source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
source§

impl<T> Borrow<T> for T
where T: ?Sized,

source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
source§

impl<T> From<T> for T

source§

fn from(t: T) -> T

Returns the argument unchanged.

source§

impl<T, U> Into<U> for T
where U: From<T>,

source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

source§

impl<T> ToOwned for T
where T: Clone,

§

type Owned = T

The resulting type after obtaining ownership.
source§

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more
source§

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more
source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

§

type Error = Infallible

The type returned in the event of a conversion error.
source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
source§

impl<T> DeserializeOwned for T
where T: for<'de> Deserialize<'de>,