tagparser/lib.rs
1pub mod parser;
2pub use crate::parser::Parser;
3
4/// Extract html tag from page
5/// # Examples
6/// ```
7/// use tagparser::parse_tags;
8///
9/// let html = "<p>Test</p><a href='https://github.com/tenqz/'>Test Link 1</a><p>Another Text</p><a href='https://github.com/tenqz/'>Test Link 2</a><p>Another Text</p><a class='test' href='https://github.com/tenqz/'>Test Link 3</a><p>Another Text</p>".to_string();
10/// let tag_a = "a".to_string();
11/// let tag_p = "p".to_string();
12/// let tags_a = parse_tags(html.clone(), tag_a);
13/// let tags_p = parse_tags(html, tag_p);
14/// assert_eq!(
15/// vec![
16/// "<a href='https://github.com/tenqz/'>Test Link 1</a>".to_string(),
17/// "<a href='https://github.com/tenqz/'>Test Link 2</a>".to_string(),
18/// "<a class='test' href='https://github.com/tenqz/'>Test Link 3</a>".to_string()
19/// ],
20/// tags_a
21/// );
22/// assert_eq!(
23/// vec![
24/// "<p>Test</p>".to_string(),
25/// "<p>Another Text</p>".to_string(),
26/// "<p>Another Text</p>".to_string(),
27/// "<p>Another Text</p>".to_string()
28/// ],
29/// tags_p
30/// )
31///
32/// ```
33pub fn parse_tags(html: String, tag: String) -> Vec<String> {
34 let mut parser = Parser::new(html);
35 parser.parse_tags(tag)
36}
37
38/// Extract HTML tags with specific attribute from page
39///
40/// This function allows you to filter HTML tags not only by tag name but also by their attributes.
41/// You can search for tags with a specific attribute (like "href" or "class") and optionally
42/// filter by the exact value of that attribute.
43///
44/// # Arguments
45///
46/// * `html` - HTML content to parse
47/// * `tag` - The HTML tag name to search for (e.g., "a", "div", "img")
48/// * `attr_name` - The attribute name to filter by (e.g., "href", "class", "id")
49/// * `attr_value` - Optional attribute value to filter by:
50/// - Pass `None` to find all tags with the specified attribute regardless of value
51/// - Pass `Some("value")` to find only tags where the attribute equals "value"
52///
53/// # Examples
54///
55/// Basic usage - finding all links with href attribute:
56///
57/// ```
58/// use tagparser::parse_tags_with_attr;
59///
60/// let html = "<p>Test</p><a href='https://github.com/tenqz/'>Test Link 1</a><p>Another Text</p><a href='https://example.com/'>Test Link 2</a><p>Another Text</p><a class='test' href='https://github.com/tenqz/'>Test Link 3</a><p>Another Text</p>".to_string();
61///
62/// // Find all 'a' tags with 'href' attribute
63/// let tags_with_href = parse_tags_with_attr(html.clone(), "a".to_string(), "href", None);
64/// assert_eq!(
65/// vec![
66/// "<a href='https://github.com/tenqz/'>Test Link 1</a>".to_string(),
67/// "<a href='https://example.com/'>Test Link 2</a>".to_string(),
68/// "<a class='test' href='https://github.com/tenqz/'>Test Link 3</a>".to_string()
69/// ],
70/// tags_with_href
71/// );
72///
73/// // Find all 'a' tags with 'class' attribute with value 'test'
74/// let tags_with_class_test = parse_tags_with_attr(html.clone(), "a".to_string(), "class", Some("test"));
75/// assert_eq!(
76/// vec![
77/// "<a class='test' href='https://github.com/tenqz/'>Test Link 3</a>".to_string()
78/// ],
79/// tags_with_class_test
80/// );
81/// ```
82///
83/// # Common Use Cases
84///
85/// 1. Extract all links to a specific domain:
86/// ```
87/// # use tagparser::parse_tags_with_attr;
88/// # let html = "<a href='https://github.com'>Link</a>".to_string();
89/// let github_links = parse_tags_with_attr(html, "a".to_string(), "href", Some("https://github.com"));
90/// ```
91///
92/// 2. Find all images with a specific class:
93/// ```
94/// # use tagparser::parse_tags_with_attr;
95/// # let html = "<img class='gallery' src='image.jpg'>".to_string();
96/// let gallery_images = parse_tags_with_attr(html, "img".to_string(), "class", Some("gallery"));
97/// ```
98///
99/// 3. Extract all input fields of a form:
100/// ```
101/// # use tagparser::parse_tags_with_attr;
102/// # let html = "<input name='username'><input name='password'>".to_string();
103/// let form_inputs = parse_tags_with_attr(html, "input".to_string(), "name", None);
104/// ```
105pub fn parse_tags_with_attr(html: String, tag: String, attr_name: &str, attr_value: Option<&str>) -> Vec<String> {
106 let mut parser = Parser::new(html);
107 parser.parse_tags_with_attr(tag, attr_name, attr_value)
108}
109
110/// Extract the text content from inside HTML tags
111///
112/// This function extracts only the text content between the opening and closing tags,
113/// without the tags themselves or any HTML attributes.
114///
115/// # Arguments
116///
117/// * `html` - HTML content to parse
118/// * `tag` - The HTML tag name to search for (e.g., "a", "p", "div")
119///
120/// # Returns
121///
122/// A vector of strings containing the text content of all matching tags
123///
124/// # Examples
125///
126/// Basic usage - extracting text from links and paragraphs:
127///
128/// ```
129/// use tagparser::extract_tag_content;
130///
131/// let html = r#"
132/// <a href='https://github.com'>GitHub</a>
133/// <p>This is a <strong>paragraph</strong> with text.</p>
134/// <a href='https://rust-lang.org'>Rust Language</a>
135/// "#.to_string();
136///
137/// // Extract text from all links
138/// let link_texts = extract_tag_content(html.clone(), "a".to_string());
139/// assert_eq!(
140/// vec!["GitHub", "Rust Language"],
141/// link_texts
142/// );
143///
144/// // Extract text from paragraphs (includes nested HTML)
145/// let paragraph_texts = extract_tag_content(html.clone(), "p".to_string());
146/// assert_eq!(
147/// vec!["This is a <strong>paragraph</strong> with text."],
148/// paragraph_texts
149/// );
150/// ```
151///
152/// # Common Use Cases
153///
154/// 1. Extract link text without HTML:
155/// ```
156/// # use tagparser::extract_tag_content;
157/// # let html = "<a href='https://example.com'>Visit Example</a>".to_string();
158/// let link_texts = extract_tag_content(html, "a".to_string());
159/// // Returns: ["Visit Example"]
160/// ```
161///
162/// 2. Extract headings from a page:
163/// ```
164/// # use tagparser::extract_tag_content;
165/// # let html = "<h1>Main Title</h1><h2>Subtitle</h2>".to_string();
166/// let headings = extract_tag_content(html, "h1".to_string());
167/// // Returns: ["Main Title"]
168/// ```
169///
170/// 3. Extract list items:
171/// ```
172/// # use tagparser::extract_tag_content;
173/// # let html = "<ul><li>Item 1</li><li>Item 2</li></ul>".to_string();
174/// let items = extract_tag_content(html, "li".to_string());
175/// // Returns: ["Item 1", "Item 2"]
176/// ```
177pub fn extract_tag_content(html: String, tag: String) -> Vec<String> {
178 let mut parser = Parser::new(html);
179 parser.extract_tag_content(tag)
180}
181
182/// Extract attribute values from HTML tags
183///
184/// This function extracts the values of a specified attribute from all matching tags.
185///
186/// # Arguments
187///
188/// * `html` - HTML content to parse
189/// * `tag` - The HTML tag name to search for (e.g., "a", "img", "div")
190/// * `attr_name` - The attribute name to extract values from (e.g., "href", "src", "class")
191///
192/// # Returns
193///
194/// A vector of strings containing the attribute values from all matching tags
195///
196/// # Examples
197///
198/// Basic usage - extracting href values from links:
199///
200/// ```
201/// use tagparser::extract_attribute_values;
202///
203/// let html = r#"
204/// <a href='https://github.com'>GitHub</a>
205/// <a href='https://rust-lang.org' class='official'>Rust</a>
206/// <a class='social' href='https://twitter.com'>Twitter</a>
207/// "#.to_string();
208///
209/// // Extract all href values from links
210/// let hrefs = extract_attribute_values(html.clone(), "a".to_string(), "href");
211/// assert_eq!(
212/// vec!["https://github.com", "https://rust-lang.org", "https://twitter.com"],
213/// hrefs
214/// );
215///
216/// // Extract all class values from links
217/// let classes = extract_attribute_values(html.clone(), "a".to_string(), "class");
218/// assert_eq!(
219/// vec!["official", "social"],
220/// classes
221/// );
222/// ```
223///
224/// # Common Use Cases
225///
226/// 1. Extract all URLs from a page:
227/// ```
228/// # use tagparser::extract_attribute_values;
229/// # let html = "<a href='https://example.com'>Link</a>".to_string();
230/// let urls = extract_attribute_values(html, "a".to_string(), "href");
231/// // Returns: ["https://example.com"]
232/// ```
233///
234/// 2. Extract all image sources:
235/// ```
236/// # use tagparser::extract_attribute_values;
237/// # let html = "<img src='image.jpg' alt='Image'><img src='logo.png'>".to_string();
238/// let image_sources = extract_attribute_values(html, "img".to_string(), "src");
239/// // Returns: ["image.jpg", "logo.png"]
240/// ```
241///
242/// 3. Extract all form input names:
243/// ```
244/// # use tagparser::extract_attribute_values;
245/// # let html = "<input name='username'><input name='password'>".to_string();
246/// let input_names = extract_attribute_values(html, "input".to_string(), "name");
247/// // Returns: ["username", "password"]
248/// ```
249pub fn extract_attribute_values(html: String, tag: String, attr_name: &str) -> Vec<String> {
250 let mut parser = Parser::new(html);
251 parser.extract_attribute_values(tag, attr_name)
252}