pub struct Parser { /* private fields */ }
Implementations§
Source§impl Parser
impl Parser
pub fn new(html: String) -> Self
Parses HTML content and extracts all tags of the specified type
§Arguments
tag
- The HTML tag name to search for (e.g., “a”, “p”, “div”)
§Returns
A vector of strings containing all matching HTML tags
§Example
let html = "<a href='https://example.com'>Link</a><p>Paragraph</p>";
let mut parser = Parser::new(html.to_string());
// Get all links
let links = parser.parse_tags("a".to_string());
assert_eq!(links, vec!["<a href='https://example.com'>Link</a>"]);
// Get all paragraphs
let paragraphs = parser.parse_tags("p".to_string());
assert_eq!(paragraphs, vec!["<p>Paragraph</p>"]);
Filters HTML tags by attribute name and optionally by attribute value
§Arguments
tag
- The HTML tag name to search for (e.g., “a”, “div”, “img”)attr_name
- The attribute name to filter by (e.g., “href”, “class”, “id”)attr_value
- Optional attribute value to filter by- If
None
, returns all tags with the specified attribute regardless of value - If
Some(value)
, returns only tags where the attribute exactly matches the value
- If
§Returns
A vector of strings containing the matching HTML tags
§Examples
let html = r#"
<a href="https://github.com">GitHub</a>
<a href="https://rust-lang.org" class="official">Rust</a>
<a class="social" href="https://twitter.com">Twitter</a>
"#;
let mut parser = Parser::new(html.to_string());
// Example 1: Find all links with href attribute (any value)
let links_with_href = parser.parse_tags_with_attr("a".to_string(), "href", None);
// Returns all three links
// Example 2: Find links with class="social"
let social_links = parser.parse_tags_with_attr("a".to_string(), "class", Some("social"));
// Returns only: <a class="social" href="https://twitter.com">Twitter</a>
// Example 3: Find links to a specific URL
let github_links = parser.parse_tags_with_attr("a".to_string(), "href", Some("https://github.com"));
// Returns only: <a href="https://github.com">GitHub</a>
§Command Line Usage
When using the CLI tool, you can filter by attributes like this:
# Find all links with href attribute
tagparser "<html>...</html>" "a" "href"
# Find all links with href pointing to github.com
tagparser "<html>...</html>" "a" "href" "https://github.com"
Sourcepub fn extract_tag_content(&mut self, tag: String) -> Vec<String>
pub fn extract_tag_content(&mut self, tag: String) -> Vec<String>
Extracts the content (text) from inside HTML tags of the specified type
This method returns only the text content between the opening and closing tags, without the tags themselves or any HTML attributes.
§Arguments
tag
- The HTML tag name to search for (e.g., “a”, “p”, “div”)
§Returns
A vector of strings containing the text content of all matching tags
§Examples
let html = r#"
<a href="https://github.com">GitHub</a>
<p>This is a <strong>paragraph</strong> with some text.</p>
<div class="container">Some content</div>
"#;
let mut parser = Parser::new(html.to_string());
// Extract content from links
let link_texts = parser.extract_tag_content("a".to_string());
assert_eq!(link_texts, vec!["GitHub"]);
// Extract content from paragraphs (includes nested HTML)
let paragraph_texts = parser.extract_tag_content("p".to_string());
assert_eq!(paragraph_texts, vec!["This is a <strong>paragraph</strong> with some text."]);
// Extract content from divs
let div_texts = parser.extract_tag_content("div".to_string());
assert_eq!(div_texts, vec!["Some content"]);
Sourcepub fn extract_attribute_values(
&mut self,
tag: String,
attr_name: &str,
) -> Vec<String>
pub fn extract_attribute_values( &mut self, tag: String, attr_name: &str, ) -> Vec<String>
Extracts attribute values from HTML tags of the specified type
This method returns the values of the specified attribute from all matching tags.
§Arguments
tag
- The HTML tag name to search for (e.g., “a”, “img”, “div”)attr_name
- The attribute name to extract values from (e.g., “href”, “src”, “class”)
§Returns
A vector of strings containing the attribute values from all matching tags. Returns an empty vector if no matching tags or attributes are found.
§Examples
let html = r#"
<a href="https://github.com">GitHub</a>
<a href="https://rust-lang.org" class="official">Rust</a>
<a class="social" href="https://twitter.com">Twitter</a>
"#;
let mut parser = Parser::new(html.to_string());
// Extract all href values from links
let hrefs = parser.extract_attribute_values("a".to_string(), "href");
assert_eq!(
vec!["https://github.com", "https://rust-lang.org", "https://twitter.com"],
hrefs
);
// Extract all class values from links
let classes = parser.extract_attribute_values("a".to_string(), "class");
assert_eq!(
vec!["official", "social"],
classes
);