pub struct RobotsTxt<'a> {
pub groups: Vec<Group<'a>>,
pub extensions: Extensions<'a>,
}Expand description
Parsed robots.txt data.
Values inside this type borrow from the original input. Use
RobotsTxt::is_allowed for access checks and inspect RobotsTxt::groups
when you need the parsed rule structure.
§Examples
use fast_robots::{RobotsTxt, RuleKind};
let robots = RobotsTxt::parse("User-agent: *\nDisallow: /admin\n");
assert_eq!(robots.groups[0].agents, ["*"]);
assert_eq!(robots.groups[0].rules[0].kind, RuleKind::Disallow);
assert_eq!(robots.groups[0].rules[0].pattern, "/admin");Fields§
§groups: Vec<Group<'a>>Standard access-control groups in source order.
extensions: Extensions<'a>extensions only.Non-core metadata collected when the extensions feature is enabled.
Implementations§
Source§impl<'a> RobotsTxt<'a>
impl<'a> RobotsTxt<'a>
Sourcepub fn parse(input: &'a str) -> Self
pub fn parse(input: &'a str) -> Self
Parses a UTF-8 robots.txt string into access rules.
This is tolerant and infallible: malformed lines are ignored where the
parser can recover. Use RobotsTxt::parse_with_diagnostics to collect
warnings, or RobotsTxt::parse_with_options to enforce a size limit.
§Examples
use fast_robots::RobotsTxt;
let robots = RobotsTxt::parse("User-agent: *\nDisallow: /private\n");
assert!(!robots.is_allowed("ExampleBot", "/private/file.html"));
assert!(robots.is_allowed("ExampleBot", "/public/file.html"));Sourcepub fn parse_bytes(input: &'a [u8]) -> Result<Self, ParseError>
pub fn parse_bytes(input: &'a [u8]) -> Result<Self, ParseError>
Parses UTF-8 bytes into access rules using ParseOptions::default.
Returns ParseError::Utf8 for invalid UTF-8 and
ParseError::TooLarge when the input is larger than
DEFAULT_MAX_BYTES.
§Examples
use fast_robots::RobotsTxt;
let robots = RobotsTxt::parse_bytes(b"User-agent: *\nDisallow: /tmp\n")?;
assert!(!robots.is_allowed("ExampleBot", "/tmp/cache"));Sourcepub fn parse_bytes_with_options(
input: &'a [u8],
options: ParseOptions,
) -> Result<Self, ParseError>
pub fn parse_bytes_with_options( input: &'a [u8], options: ParseOptions, ) -> Result<Self, ParseError>
Parses UTF-8 bytes into access rules with explicit options.
Use this when reading raw bytes and you need a custom size limit.
§Examples
use fast_robots::{ParseOptions, RobotsTxt};
let robots = RobotsTxt::parse_bytes_with_options(
b"User-agent: *\nDisallow: /cache\n",
ParseOptions { max_bytes: Some(1024) },
)?;
assert!(!robots.is_allowed("ExampleBot", "/cache/file"));Sourcepub fn parse_with_options(
input: &'a str,
options: ParseOptions,
) -> Result<Self, ParseError>
pub fn parse_with_options( input: &'a str, options: ParseOptions, ) -> Result<Self, ParseError>
Parses a UTF-8 string into access rules with explicit options.
This is useful when the input is already a str but should still be
checked against a maximum size.
§Examples
use fast_robots::{ParseOptions, RobotsTxt};
let robots = RobotsTxt::parse_with_options(
"User-agent: *\nDisallow: /private\n",
ParseOptions { max_bytes: Some(1024) },
)?;
assert!(!robots.is_allowed("ExampleBot", "/private"));Sourcepub fn parse_with_diagnostics(input: &'a str) -> ParseReport<'a>
pub fn parse_with_diagnostics(input: &'a str) -> ParseReport<'a>
Parses a UTF-8 string and records recoverable syntax warnings.
Diagnostics do not change parser recovery behavior; they only expose the issues that tolerant parsing skipped.
§Examples
use fast_robots::{ParseWarningKind, RobotsTxt};
let report = RobotsTxt::parse_with_diagnostics(
"Disallow: /\nMissing separator\nUser-agent: *\nDisallow: /private\n",
);
assert_eq!(report.warnings.len(), 2);
assert!(matches!(
report.warnings[0].kind,
ParseWarningKind::RuleBeforeUserAgent { .. }
));
assert!(!report.robots.is_allowed("ExampleBot", "/private"));Sourcepub fn parse_with_diagnostics_options(
input: &'a str,
options: ParseOptions,
) -> Result<ParseReport<'a>, ParseError>
pub fn parse_with_diagnostics_options( input: &'a str, options: ParseOptions, ) -> Result<ParseReport<'a>, ParseError>
Parses a UTF-8 string with diagnostics and explicit options.
§Examples
use fast_robots::{ParseOptions, RobotsTxt};
let report = RobotsTxt::parse_with_diagnostics_options(
"User-agent: *\nDisallow: /private\n",
ParseOptions { max_bytes: Some(1024) },
)?;
assert!(report.warnings.is_empty());
assert!(!report.robots.is_allowed("ExampleBot", "/private"));Sourcepub fn parse_bytes_with_diagnostics(
input: &'a [u8],
) -> Result<ParseReport<'a>, ParseError>
pub fn parse_bytes_with_diagnostics( input: &'a [u8], ) -> Result<ParseReport<'a>, ParseError>
Parses UTF-8 bytes and records recoverable syntax warnings.
Uses ParseOptions::default for size checking.
§Examples
use fast_robots::RobotsTxt;
let report = RobotsTxt::parse_bytes_with_diagnostics(
b"User-agent: *\nDisallow: /private\n",
)?;
assert!(report.warnings.is_empty());
assert!(!report.robots.is_allowed("ExampleBot", "/private"));Sourcepub fn parse_bytes_with_diagnostics_options(
input: &'a [u8],
options: ParseOptions,
) -> Result<ParseReport<'a>, ParseError>
pub fn parse_bytes_with_diagnostics_options( input: &'a [u8], options: ParseOptions, ) -> Result<ParseReport<'a>, ParseError>
Parses UTF-8 bytes with diagnostics and explicit options.
§Examples
use fast_robots::{ParseOptions, RobotsTxt};
let report = RobotsTxt::parse_bytes_with_diagnostics_options(
b"User-agent: *\nDisallow: /private\n",
ParseOptions { max_bytes: Some(1024) },
)?;
assert!(report.warnings.is_empty());Sourcepub fn matcher(&'a self) -> RobotsMatcher<'a>
pub fn matcher(&'a self) -> RobotsMatcher<'a>
Builds an indexed matcher for repeated access checks.
The returned matcher borrows this parsed file, indexes user-agent groups,
and precomputes rule metadata. Use it when checking many URLs against the
same robots.txt; for one-off checks, RobotsTxt::is_allowed avoids
the upfront allocation cost.
§Examples
use fast_robots::RobotsTxt;
let robots = RobotsTxt::parse("User-agent: *\nDisallow: /private\n");
let matcher = robots.matcher();
assert!(!matcher.is_allowed("ExampleBot", "/private/file"));
assert!(matcher.is_allowed("ExampleBot", "/public/file"));Sourcepub fn is_allowed(&self, user_agent: &str, path: &str) -> bool
pub fn is_allowed(&self, user_agent: &str, path: &str) -> bool
Returns whether user_agent may crawl path.
The matcher implements the core RFC 9309 access semantics used by this
crate: exact user-agent groups are considered before the * fallback,
matching exact groups are merged, the longest matching pattern wins, and
Allow wins ties. /robots.txt is always allowed.
path should be the URL path and optional query string, not a full URL.
§Examples
use fast_robots::RobotsTxt;
let robots = RobotsTxt::parse(
"User-agent: *\n\
Disallow: /private\n\
Allow: /private/public\n",
);
assert!(!robots.is_allowed("ExampleBot", "/private/file"));
assert!(robots.is_allowed("ExampleBot", "/private/public/file"));
assert!(robots.is_allowed("ExampleBot", "/robots.txt"));