RobotsTxt

Struct RobotsTxt 

Source
pub struct RobotsTxt { /* private fields */ }
Expand description

The main structure representing a parsed robots.txt file.

Implementations§

Source§

impl RobotsTxt

Source

pub fn parse(content: &str) -> Self

Parse a robots.txt file from a string.

§Arguments
  • content - The robots.txt file content as a string
§Example
use robotstxt_rs::RobotsTxt;

let content = "User-agent: *\nDisallow: /admin/";
let robots = RobotsTxt::parse(content);
Source

pub fn parse_with_domain(content: &str, domain: Option<String>) -> Self

Parse a robots.txt file from a string with a specified domain.

§Arguments
  • content - The robots.txt file content as a string
  • domain - Optional domain name to associate with this robots.txt
§Example
use robotstxt_rs::RobotsTxt;

let content = "User-agent: *\nDisallow: /admin/";
let robots = RobotsTxt::parse_with_domain(content, Some("example.com".to_string()));
Source

pub async fn from_url(url: &str) -> Result<Self, Box<dyn Error>>

Fetch and parse a robots.txt file from a URL (requires async feature).

§Arguments
  • url - The URL to the robots.txt file
§Example
use robotstxt_rs::RobotsTxt;

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let robots = RobotsTxt::from_url("https://example.com/robots.txt").await?;
    Ok(())
}
Source

pub fn can_fetch(&self, user_agent: &str, path: &str) -> bool

Check if a user-agent is allowed to fetch a specific path.

§Arguments
  • user_agent - The user-agent string (e.g., “Googlebot”)
  • path - The path to check (e.g., “/admin/panel”)
§Returns

Returns true if the user-agent is allowed to fetch the path, false otherwise.

§Example
use robotstxt_rs::RobotsTxt;

let content = "User-agent: *\nDisallow: /admin/";
let robots = RobotsTxt::parse(content);
assert!(!robots.can_fetch("Googlebot", "/admin/panel"));
assert!(robots.can_fetch("Googlebot", "/public/page"));
Source

pub fn get_domain(&self) -> Option<&str>

Get the domain associated with this robots.txt file.

§Returns

Returns Some(&str) if a domain was specified, None otherwise.

Source

pub fn get_sitemaps(&self) -> &[String]

Get all sitemap URLs from the robots.txt file.

§Returns

Returns a slice of sitemap URLs.

Source

pub fn get_comments(&self) -> &[String]

Get all comments from the robots.txt file.

§Returns

Returns a slice of comment strings (without the # prefix).

Source

pub fn get_rules(&self) -> &HashMap<String, RobotRule>

Get all rules for all user-agents.

§Returns

Returns a HashMap where keys are user-agent strings and values are RobotRule structs.

Source

pub fn get_rule(&self, user_agent: &str) -> Option<&RobotRule>

Get the rule for a specific user-agent.

§Arguments
  • user_agent - The user-agent string to look up
§Returns

Returns Some(&RobotRule) if rules exist for this user-agent or the wildcard (*), None otherwise.

§Example
use robotstxt_rs::RobotsTxt;

let content = "User-agent: Googlebot\nDisallow: /private/";
let robots = RobotsTxt::parse(content);
if let Some(rule) = robots.get_rule("Googlebot") {
    println!("Disallowed paths: {:?}", rule.disallowed);
}

Trait Implementations§

Source§

impl Debug for RobotsTxt

Source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more

Auto Trait Implementations§

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T> Instrument for T

Source§

fn instrument(self, span: Span) -> Instrumented<Self>

Instruments this type with the provided Span, returning an Instrumented wrapper. Read more
Source§

fn in_current_span(self) -> Instrumented<Self>

Instruments this type with the current Span, returning an Instrumented wrapper. Read more
Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T> PolicyExt for T
where T: ?Sized,

Source§

fn and<P, B, E>(self, other: P) -> And<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

Create a new Policy that returns Action::Follow only if self and other return Action::Follow. Read more
Source§

fn or<P, B, E>(self, other: P) -> Or<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

Create a new Policy that returns Action::Follow if either self or other returns Action::Follow. Read more
Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
Source§

impl<T> WithSubscriber for T

Source§

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

Attaches the provided Subscriber to this type, returning a WithDispatch wrapper. Read more
Source§

fn with_current_subscriber(self) -> WithDispatch<Self>

Attaches the current default Subscriber to this type, returning a WithDispatch wrapper. Read more
Source§

impl<T> ErasedDestructor for T
where T: 'static,