Struct Parser

Source
pub struct Parser { /* private fields */ }

Implementations§

Source§

impl Parser

Source

pub fn new(html: String) -> Self

Source

pub fn parse_tags(&mut self, tag: String) -> Vec<String>

Parses HTML content and extracts all tags of the specified type

§Arguments
  • tag - The HTML tag name to search for (e.g., “a”, “p”, “div”)
§Returns

A vector of strings containing all matching HTML tags

§Example
let html = "<a href='https://example.com'>Link</a><p>Paragraph</p>";
let mut parser = Parser::new(html.to_string());
 
// Get all links
let links = parser.parse_tags("a".to_string());
assert_eq!(links, vec!["<a href='https://example.com'>Link</a>"]);
 
// Get all paragraphs
let paragraphs = parser.parse_tags("p".to_string());
assert_eq!(paragraphs, vec!["<p>Paragraph</p>"]);
Source

pub fn parse_tags_with_attr( &mut self, tag: String, attr_name: &str, attr_value: Option<&str>, ) -> Vec<String>

Filters HTML tags by attribute name and optionally by attribute value

§Arguments
  • tag - The HTML tag name to search for (e.g., “a”, “div”, “img”)
  • attr_name - The attribute name to filter by (e.g., “href”, “class”, “id”)
  • attr_value - Optional attribute value to filter by
    • If None, returns all tags with the specified attribute regardless of value
    • If Some(value), returns only tags where the attribute exactly matches the value
§Returns

A vector of strings containing the matching HTML tags

§Examples
let html = r#"
    <a href="https://github.com">GitHub</a>
    <a href="https://rust-lang.org" class="official">Rust</a>
    <a class="social" href="https://twitter.com">Twitter</a>
"#;
 
let mut parser = Parser::new(html.to_string());
 
// Example 1: Find all links with href attribute (any value)
let links_with_href = parser.parse_tags_with_attr("a".to_string(), "href", None);
// Returns all three links
 
// Example 2: Find links with class="social"
let social_links = parser.parse_tags_with_attr("a".to_string(), "class", Some("social"));
// Returns only: <a class="social" href="https://twitter.com">Twitter</a>
 
// Example 3: Find links to a specific URL
let github_links = parser.parse_tags_with_attr("a".to_string(), "href", Some("https://github.com"));
// Returns only: <a href="https://github.com">GitHub</a>
§Command Line Usage

When using the CLI tool, you can filter by attributes like this:

# Find all links with href attribute
tagparser "<html>...</html>" "a" "href"
 
# Find all links with href pointing to github.com
tagparser "<html>...</html>" "a" "href" "https://github.com"
Source

pub fn extract_tag_content(&mut self, tag: String) -> Vec<String>

Extracts the content (text) from inside HTML tags of the specified type

This method returns only the text content between the opening and closing tags, without the tags themselves or any HTML attributes.

§Arguments
  • tag - The HTML tag name to search for (e.g., “a”, “p”, “div”)
§Returns

A vector of strings containing the text content of all matching tags

§Examples
let html = r#"
    <a href="https://github.com">GitHub</a>
    <p>This is a <strong>paragraph</strong> with some text.</p>
    <div class="container">Some content</div>
"#;
 
let mut parser = Parser::new(html.to_string());
 
// Extract content from links
let link_texts = parser.extract_tag_content("a".to_string());
assert_eq!(link_texts, vec!["GitHub"]);
 
// Extract content from paragraphs (includes nested HTML)
let paragraph_texts = parser.extract_tag_content("p".to_string());
assert_eq!(paragraph_texts, vec!["This is a <strong>paragraph</strong> with some text."]);
 
// Extract content from divs
let div_texts = parser.extract_tag_content("div".to_string());
assert_eq!(div_texts, vec!["Some content"]);
Source

pub fn extract_attribute_values( &mut self, tag: String, attr_name: &str, ) -> Vec<String>

Extracts attribute values from HTML tags of the specified type

This method returns the values of the specified attribute from all matching tags.

§Arguments
  • tag - The HTML tag name to search for (e.g., “a”, “img”, “div”)
  • attr_name - The attribute name to extract values from (e.g., “href”, “src”, “class”)
§Returns

A vector of strings containing the attribute values from all matching tags. Returns an empty vector if no matching tags or attributes are found.

§Examples
let html = r#"
    <a href="https://github.com">GitHub</a>
    <a href="https://rust-lang.org" class="official">Rust</a>
    <a class="social" href="https://twitter.com">Twitter</a>
"#;
 
let mut parser = Parser::new(html.to_string());
 
// Extract all href values from links
let hrefs = parser.extract_attribute_values("a".to_string(), "href");
assert_eq!(
    vec!["https://github.com", "https://rust-lang.org", "https://twitter.com"],
    hrefs
);
 
// Extract all class values from links
let classes = parser.extract_attribute_values("a".to_string(), "class");
assert_eq!(
    vec!["official", "social"],
    classes
);

Auto Trait Implementations§

§

impl Freeze for Parser

§

impl RefUnwindSafe for Parser

§

impl Send for Parser

§

impl Sync for Parser

§

impl Unpin for Parser

§

impl UnwindSafe for Parser

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.