pub struct Robots { /* private fields */ }
parser
only.Expand description
The set of directives related to the specific user-agent
in the provided robots.txt
file.
§Example
User-Agent: foobot
Disallow: *
Allow: /example/
Disallow: /example/nope.txt
§Usage
use robotxt::Robots;
let txt = // "...".as_bytes()
let r = Robots::from_bytes(txt, "foobot");
assert!(r.is_relative_allowed("/example/yeah.txt"));
assert!(!r.is_relative_allowed("/example/nope.txt"));
assert!(!r.is_relative_allowed("/invalid/path.txt"));
Implementations§
source§impl Robots
impl Robots
sourcepub fn from_bytes(robots: &[u8], user_agent: &str) -> Self
pub fn from_bytes(robots: &[u8], user_agent: &str) -> Self
Creates a new instance from the byte slice.
use robotxt::Robots;
let txt = r#"
User-Agent: foobot
Disallow: *
Allow: /example/
Disallow: /example/nope.txt
"#.as_bytes();
let r = Robots::from_bytes(txt, "foobot");
assert!(r.is_relative_allowed("/example/yeah.txt"));
assert!(!r.is_relative_allowed("/example/nope.txt"));
assert!(!r.is_relative_allowed("/invalid/path.txt"));
sourcepub fn from_reader<R: Read>(reader: R, user_agent: &str) -> Result<Self, Error>
pub fn from_reader<R: Read>(reader: R, user_agent: &str) -> Result<Self, Error>
Creates a new instance from the generic reader.
use robotxt::Robots;
// Let's pretend it's something that actually needs a reader.
// The std::io::Read trait is implemented for &[u8].
let reader = r#"
User-Agent: foobot
Disallow: *
Allow: /example/
Disallow: /example/nope.txt
"#.as_bytes();
let r = Robots::from_reader(reader, "foobot").unwrap();
assert!(r.is_relative_allowed("/example/yeah.txt"));
assert!(!r.is_relative_allowed("/example/nope.txt"));
assert!(!r.is_relative_allowed("/invalid/path.txt"));
sourcepub fn from_access(access: AccessResult<'_>, user_agent: &str) -> Self
pub fn from_access(access: AccessResult<'_>, user_agent: &str) -> Self
Creates a new instance from the AccessResult
.
use robotxt::{AccessResult, Robots};
let r = Robots::from_access(AccessResult::Redirect, "foobot");
assert!(r.is_relative_allowed("/example/yeah.txt"));
assert!(r.is_relative_allowed("/example/nope.txt"));
let r = Robots::from_access(AccessResult::Unavailable, "foobot");
assert!(r.is_relative_allowed("/example/yeah.txt"));
assert!(r.is_relative_allowed("/example/nope.txt"));
let r = Robots::from_access(AccessResult::Unreachable, "foobot");
assert!(!r.is_relative_allowed("/example/yeah.txt"));
assert!(!r.is_relative_allowed("/example/nope.txt"));
sourcepub fn from_always(always: bool, user_agent: &str) -> Self
pub fn from_always(always: bool, user_agent: &str) -> Self
Creates a new instance from the global rule.
use robotxt::Robots;
let r = Robots::from_always(true, "foobot");
assert!(r.is_relative_allowed("/example/yeah.txt"));
assert!(r.is_relative_allowed("/example/nope.txt"));
sourcepub fn builder() -> RobotsBuilder
Available on crate feature builder
only.
pub fn builder() -> RobotsBuilder
builder
only.Creates a new builder with default settings.
See RobotsBuilder::new
.
source§impl Robots
impl Robots
sourcepub fn try_is_relative_allowed(&self, addr: &str) -> Option<bool>
pub fn try_is_relative_allowed(&self, addr: &str) -> Option<bool>
Returns Some(true)
if there is an explicit allow
or the global rule.
NOTE: Expects relative path.
use robotxt::Robots;
let txt = r#"
User-Agent: foobot
Allow: /example/
Disallow: /example/nope.txt
"#.as_bytes();
let r = Robots::from_bytes(txt, "foobot");
assert_eq!(r.try_is_relative_allowed("/example/yeah.txt"), Some(true));
assert_eq!(r.try_is_relative_allowed("/example/nope.txt"), Some(false));
assert_eq!(r.try_is_relative_allowed("/invalid/path.txt"), None);
sourcepub fn is_relative_allowed(&self, addr: &str) -> bool
pub fn is_relative_allowed(&self, addr: &str) -> bool
Returns true
if the path is allowed for the user-agent.
NOTE: Expects relative path.
use robotxt::Robots;
let txt = r#"
User-Agent: foobot
Disallow: *
Allow: /example/
Disallow: /example/nope.txt
"#.as_bytes();
let r = Robots::from_bytes(txt, "foobot");
assert!(r.is_relative_allowed("/example/yeah.txt"));
assert!(!r.is_relative_allowed("/example/nope.txt"));
assert!(!r.is_relative_allowed("/invalid/path.txt"));
sourcepub fn try_is_absolute_allowed(&self, addr: &Url) -> Option<bool>
pub fn try_is_absolute_allowed(&self, addr: &Url) -> Option<bool>
Returns Some(true)
if there is an explicit allow
or the global rule.
NOTE: Expects relative path.
use url::Url;
use robotxt::Robots;
let txt = r#"
User-Agent: foobot
Allow: /example/
Disallow: /example/nope.txt
"#.as_bytes();
let r = Robots::from_bytes(txt, "foobot");
let base = Url::parse("https://example.com/").unwrap();
assert_eq!(r.try_is_absolute_allowed(&base.join("/example/yeah.txt").unwrap()), Some(true));
assert_eq!(r.try_is_absolute_allowed(&base.join("/example/nope.txt").unwrap()), Some(false));
assert_eq!(r.try_is_absolute_allowed(&base.join("/invalid/path.txt").unwrap()), None);
sourcepub fn is_absolute_allowed(&self, addr: &Url) -> bool
pub fn is_absolute_allowed(&self, addr: &Url) -> bool
Returns true if the path is allowed for the user-agent. NOTE: Ignores different host.
use url::Url;
use robotxt::Robots;
let txt = r#"
User-Agent: foobot
Disallow: *
Allow: /example/
Disallow: /example/nope.txt
"#.as_bytes();
let r = Robots::from_bytes(txt, "foobot");
let base = Url::parse("https://example.com/").unwrap();
assert!(r.is_absolute_allowed(&base.join("/example/yeah.txt").unwrap()));
assert!(!r.is_absolute_allowed(&base.join("/example/nope.txt").unwrap()));
assert!(!r.is_absolute_allowed(&base.join("/invalid/path.txt").unwrap()));
sourcepub fn is_always(&self) -> Option<bool>
pub fn is_always(&self) -> Option<bool>
Returns Some(_)
if the site is fully allowed or disallowed.
use robotxt::Robots;
let r = Robots::from_always(true, "foobot");
assert_eq!(r.is_always(), Some(true));
let r = Robots::from_always(false, "foobot");
assert_eq!(r.is_always(), Some(false));
sourcepub fn user_agent(&self) -> &str
pub fn user_agent(&self) -> &str
Returns the longest matching user-agent.
use robotxt::Robots;
let txt = r#"
User-Agent: foo
User-Agent: foobot
User-Agent: foobot-images
"#.as_bytes();
let r = Robots::from_bytes(txt, "foobot-search");
assert_eq!(r.user_agent(), "foobot");
sourcepub fn crawl_delay(&self) -> Option<Duration>
pub fn crawl_delay(&self) -> Option<Duration>
Returns the crawl-delay of the user-agent if specified.
use std::time::Duration;
use robotxt::Robots;
let txt = r#"
User-Agent: foobot
Crawl-Delay: 5
"#.as_bytes();
let r = Robots::from_bytes(txt, "foobot");
assert_eq!(r.crawl_delay(), Some(Duration::from_secs(5)));
sourcepub fn sitemaps(&self) -> &[Url]
pub fn sitemaps(&self) -> &[Url]
Returns all collected sitemaps.
use robotxt::Robots;
let txt = r#"
Sitemap: https://example.com/sitemap_1.xml
Sitemap: https://example.com/sitemap_2.xml
"#.as_bytes();
let r = Robots::from_bytes(txt, "foobot");
assert_eq!(r.sitemaps().len(), 2);