pub struct RobotsTxt { /* private fields */ }
Expand description
The main structure representing a parsed robots.txt file.
Implementations§
Source§impl RobotsTxt
impl RobotsTxt
Sourcepub fn parse_with_domain(content: &str, domain: Option<String>) -> Self
pub fn parse_with_domain(content: &str, domain: Option<String>) -> Self
Parse a robots.txt file from a string with a specified domain.
§Arguments
content
- The robots.txt file content as a stringdomain
- Optional domain name to associate with this robots.txt
§Example
use robotstxt_rs::RobotsTxt;
let content = "User-agent: *\nDisallow: /admin/";
let robots = RobotsTxt::parse_with_domain(content, Some("example.com".to_string()));
Sourcepub async fn from_url(url: &str) -> Result<Self, Box<dyn Error>>
pub async fn from_url(url: &str) -> Result<Self, Box<dyn Error>>
Fetch and parse a robots.txt file from a URL (requires async feature).
§Arguments
url
- The URL to the robots.txt file
§Example
use robotstxt_rs::RobotsTxt;
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let robots = RobotsTxt::from_url("https://example.com/robots.txt").await?;
Ok(())
}
Sourcepub fn can_fetch(&self, user_agent: &str, path: &str) -> bool
pub fn can_fetch(&self, user_agent: &str, path: &str) -> bool
Check if a user-agent is allowed to fetch a specific path.
§Arguments
user_agent
- The user-agent string (e.g., “Googlebot”)path
- The path to check (e.g., “/admin/panel”)
§Returns
Returns true
if the user-agent is allowed to fetch the path, false
otherwise.
§Example
use robotstxt_rs::RobotsTxt;
let content = "User-agent: *\nDisallow: /admin/";
let robots = RobotsTxt::parse(content);
assert!(!robots.can_fetch("Googlebot", "/admin/panel"));
assert!(robots.can_fetch("Googlebot", "/public/page"));
Sourcepub fn get_domain(&self) -> Option<&str>
pub fn get_domain(&self) -> Option<&str>
Get the domain associated with this robots.txt file.
§Returns
Returns Some(&str)
if a domain was specified, None
otherwise.
Sourcepub fn get_sitemaps(&self) -> &[String]
pub fn get_sitemaps(&self) -> &[String]
Sourcepub fn get_comments(&self) -> &[String]
pub fn get_comments(&self) -> &[String]
Get all comments from the robots.txt file.
§Returns
Returns a slice of comment strings (without the # prefix).
Sourcepub fn get_rules(&self) -> &HashMap<String, RobotRule>
pub fn get_rules(&self) -> &HashMap<String, RobotRule>
Get all rules for all user-agents.
§Returns
Returns a HashMap where keys are user-agent strings and values are RobotRule structs.
Sourcepub fn get_rule(&self, user_agent: &str) -> Option<&RobotRule>
pub fn get_rule(&self, user_agent: &str) -> Option<&RobotRule>
Get the rule for a specific user-agent.
§Arguments
user_agent
- The user-agent string to look up
§Returns
Returns Some(&RobotRule)
if rules exist for this user-agent or the wildcard (*), None
otherwise.
§Example
use robotstxt_rs::RobotsTxt;
let content = "User-agent: Googlebot\nDisallow: /private/";
let robots = RobotsTxt::parse(content);
if let Some(rule) = robots.get_rule("Googlebot") {
println!("Disallowed paths: {:?}", rule.disallowed);
}