pub struct Robots { /* private fields */ }
Expand description
The Robots
struct represents the set of directives related to
the specific user-agent
in the provided robots.txt
file.
Implementations§
source§impl Robots
impl Robots
sourcepub fn from_bytes(robots: &[u8], user_agent: &str) -> Self
pub fn from_bytes(robots: &[u8], user_agent: &str) -> Self
Creates a new Robots
from the byte slice.
use robotxt::Robots;
let txt = r#"
User-Agent: foobot
Disallow: *
Allow: /example/
Disallow: /example/nope.txt
"#.as_bytes();
let r = Robots::from_bytes(txt, "foobot");
assert!(r.is_allowed("/example/yeah.txt"));
assert!(!r.is_allowed("/example/nope.txt"));
assert!(!r.is_allowed("/invalid/path.txt"));
sourcepub fn from_reader<R: Read>(
reader: R,
user_agent: &str
) -> Result<Self, IoError>
pub fn from_reader<R: Read>( reader: R, user_agent: &str ) -> Result<Self, IoError>
Creates a new Robots
from the generic reader.
use robotxt::Robots;
// Let's pretend it's something that actually needs a reader.
// The std::io::Read trait is implemented for &[u8].
let reader = r#"
User-Agent: foobot
Disallow: *
Allow: /example/
Disallow: /example/nope.txt
"#.as_bytes();
let r = Robots::from_reader(reader, "foobot").unwrap();
assert!(r.is_allowed("/example/yeah.txt"));
assert!(!r.is_allowed("/example/nope.txt"));
assert!(!r.is_allowed("/invalid/path.txt"));
sourcepub fn from_access(access: AccessResult<'_>, user_agent: &str) -> Self
pub fn from_access(access: AccessResult<'_>, user_agent: &str) -> Self
Creates a new Robots
from the AccessResult
.
use robotxt::{AccessResult, Robots};
let r = Robots::from_access(AccessResult::Redirect, "foobot");
assert!(r.is_allowed("/example/yeah.txt"));
assert!(r.is_allowed("/example/nope.txt"));
let r = Robots::from_access(AccessResult::Unavailable, "foobot");
assert!(r.is_allowed("/example/yeah.txt"));
assert!(r.is_allowed("/example/nope.txt"));
let r = Robots::from_access(AccessResult::Unreachable, "foobot");
assert!(!r.is_allowed("/example/yeah.txt"));
assert!(!r.is_allowed("/example/nope.txt"));
sourcepub fn from_always(always: bool, user_agent: &str) -> Self
pub fn from_always(always: bool, user_agent: &str) -> Self
Creates a new Robots
from the global rule.
use robotxt::Robots;
let r = Robots::from_always(true, "foobot");
assert!(r.is_allowed("/example/yeah.txt"));
assert!(r.is_allowed("/example/nope.txt"));
source§impl Robots
impl Robots
sourcepub fn is_allowed(&self, path: &str) -> bool
pub fn is_allowed(&self, path: &str) -> bool
Returns true if the path is allowed for the user-agent. NOTE: Expects relative path.
use robotxt::Robots;
let txt = r#"
User-Agent: foobot
Disallow: *
Allow: /example/
Disallow: /example/nope.txt
"#.as_bytes();
let r = Robots::from_bytes(txt, "foobot");
assert!(r.is_allowed("/example/yeah.txt"));
assert!(!r.is_allowed("/example/nope.txt"));
assert!(!r.is_allowed("/invalid/path.txt"));
sourcepub fn is_always(&self) -> Option<bool>
pub fn is_always(&self) -> Option<bool>
Returns Some(_)
if the site is fully allowed or disallowed.
use robotxt::Robots;
let r = Robots::from_always(true, "foobot");
assert_eq!(r.is_always(), Some(true));
let r = Robots::from_always(false, "foobot");
assert_eq!(r.is_always(), Some(false));
sourcepub fn user_agent(&self) -> &str
pub fn user_agent(&self) -> &str
Returns the longest matching user-agent.
use robotxt::Robots;
let txt = r#"
User-Agent: foo
User-Agent: foobot
"#.as_bytes();
let r = Robots::from_bytes(txt, "foobot-search");
assert_eq!(r.user_agent(), "foobot");
sourcepub fn crawl_delay(&self) -> Option<Duration>
pub fn crawl_delay(&self) -> Option<Duration>
Returns the crawl-delay of the user-agent if specified.
use std::time::Duration;
use robotxt::Robots;
let txt = r#"
User-Agent: foobot
Crawl-Delay: 5
"#.as_bytes();
let r = Robots::from_bytes(txt, "foobot");
assert_eq!(r.crawl_delay(), Some(Duration::from_secs(5)));