tagparser/
lib.rs

1pub mod parser;
2pub use crate::parser::Parser;
3
4/// Extract html tag from page
5/// # Examples
6/// ```
7///     use tagparser::parse_tags;
8///
9///     let html = "<p>Test</p><a href='https://github.com/tenqz/'>Test Link 1</a><p>Another Text</p><a href='https://github.com/tenqz/'>Test Link 2</a><p>Another Text</p><a class='test' href='https://github.com/tenqz/'>Test Link 3</a><p>Another Text</p>".to_string();
10///     let tag_a = "a".to_string();
11///     let tag_p = "p".to_string();
12///     let tags_a = parse_tags(html.clone(), tag_a);
13///     let tags_p = parse_tags(html, tag_p);
14///     assert_eq!(
15///        vec![
16///            "<a href='https://github.com/tenqz/'>Test Link 1</a>".to_string(),
17///            "<a href='https://github.com/tenqz/'>Test Link 2</a>".to_string(),
18///            "<a class='test' href='https://github.com/tenqz/'>Test Link 3</a>".to_string()
19///        ],
20///        tags_a
21///    );
22///     assert_eq!(
23///        vec![
24///             "<p>Test</p>".to_string(),
25///             "<p>Another Text</p>".to_string(),
26///             "<p>Another Text</p>".to_string(),
27///             "<p>Another Text</p>".to_string()
28///        ],
29///        tags_p
30///     )
31///
32/// ```
33pub fn parse_tags(html: String, tag: String) -> Vec<String> {
34    let mut parser = Parser::new(html);
35    parser.parse_tags(tag)
36}
37
38/// Extract HTML tags with specific attribute from page
39/// 
40/// This function allows you to filter HTML tags not only by tag name but also by their attributes.
41/// You can search for tags with a specific attribute (like "href" or "class") and optionally
42/// filter by the exact value of that attribute.
43/// 
44/// # Arguments
45/// 
46/// * `html` - HTML content to parse
47/// * `tag` - The HTML tag name to search for (e.g., "a", "div", "img")
48/// * `attr_name` - The attribute name to filter by (e.g., "href", "class", "id")
49/// * `attr_value` - Optional attribute value to filter by:
50///   - Pass `None` to find all tags with the specified attribute regardless of value
51///   - Pass `Some("value")` to find only tags where the attribute equals "value"
52/// 
53/// # Examples
54/// 
55/// Basic usage - finding all links with href attribute:
56/// 
57/// ```
58///     use tagparser::parse_tags_with_attr;
59///
60///     let html = "<p>Test</p><a href='https://github.com/tenqz/'>Test Link 1</a><p>Another Text</p><a href='https://example.com/'>Test Link 2</a><p>Another Text</p><a class='test' href='https://github.com/tenqz/'>Test Link 3</a><p>Another Text</p>".to_string();
61///     
62///     // Find all 'a' tags with 'href' attribute
63///     let tags_with_href = parse_tags_with_attr(html.clone(), "a".to_string(), "href", None);
64///     assert_eq!(
65///        vec![
66///            "<a href='https://github.com/tenqz/'>Test Link 1</a>".to_string(),
67///            "<a href='https://example.com/'>Test Link 2</a>".to_string(),
68///            "<a class='test' href='https://github.com/tenqz/'>Test Link 3</a>".to_string()
69///        ],
70///        tags_with_href
71///     );
72///     
73///     // Find all 'a' tags with 'class' attribute with value 'test'
74///     let tags_with_class_test = parse_tags_with_attr(html.clone(), "a".to_string(), "class", Some("test"));
75///     assert_eq!(
76///        vec![
77///            "<a class='test' href='https://github.com/tenqz/'>Test Link 3</a>".to_string()
78///        ],
79///        tags_with_class_test
80///     );
81/// ```
82/// 
83/// # Common Use Cases
84/// 
85/// 1. Extract all links to a specific domain:
86///    ```
87///    # use tagparser::parse_tags_with_attr;
88///    # let html = "<a href='https://github.com'>Link</a>".to_string();
89///    let github_links = parse_tags_with_attr(html, "a".to_string(), "href", Some("https://github.com"));
90///    ```
91/// 
92/// 2. Find all images with a specific class:
93///    ```
94///    # use tagparser::parse_tags_with_attr;
95///    # let html = "<img class='gallery' src='image.jpg'>".to_string();
96///    let gallery_images = parse_tags_with_attr(html, "img".to_string(), "class", Some("gallery"));
97///    ```
98/// 
99/// 3. Extract all input fields of a form:
100///    ```
101///    # use tagparser::parse_tags_with_attr;
102///    # let html = "<input name='username'><input name='password'>".to_string();
103///    let form_inputs = parse_tags_with_attr(html, "input".to_string(), "name", None);
104///    ```
105pub fn parse_tags_with_attr(html: String, tag: String, attr_name: &str, attr_value: Option<&str>) -> Vec<String> {
106    let mut parser = Parser::new(html);
107    parser.parse_tags_with_attr(tag, attr_name, attr_value)
108}
109
110/// Extract the text content from inside HTML tags
111/// 
112/// This function extracts only the text content between the opening and closing tags,
113/// without the tags themselves or any HTML attributes.
114/// 
115/// # Arguments
116/// 
117/// * `html` - HTML content to parse
118/// * `tag` - The HTML tag name to search for (e.g., "a", "p", "div")
119/// 
120/// # Returns
121/// 
122/// A vector of strings containing the text content of all matching tags
123/// 
124/// # Examples
125/// 
126/// Basic usage - extracting text from links and paragraphs:
127/// 
128/// ```
129///     use tagparser::extract_tag_content;
130///
131///     let html = r#"
132///         <a href='https://github.com'>GitHub</a>
133///         <p>This is a <strong>paragraph</strong> with text.</p>
134///         <a href='https://rust-lang.org'>Rust Language</a>
135///     "#.to_string();
136///     
137///     // Extract text from all links
138///     let link_texts = extract_tag_content(html.clone(), "a".to_string());
139///     assert_eq!(
140///         vec!["GitHub", "Rust Language"],
141///         link_texts
142///     );
143///     
144///     // Extract text from paragraphs (includes nested HTML)
145///     let paragraph_texts = extract_tag_content(html.clone(), "p".to_string());
146///     assert_eq!(
147///         vec!["This is a <strong>paragraph</strong> with text."],
148///         paragraph_texts
149///     );
150/// ```
151/// 
152/// # Common Use Cases
153/// 
154/// 1. Extract link text without HTML:
155///    ```
156///    # use tagparser::extract_tag_content;
157///    # let html = "<a href='https://example.com'>Visit Example</a>".to_string();
158///    let link_texts = extract_tag_content(html, "a".to_string());
159///    // Returns: ["Visit Example"]
160///    ```
161/// 
162/// 2. Extract headings from a page:
163///    ```
164///    # use tagparser::extract_tag_content;
165///    # let html = "<h1>Main Title</h1><h2>Subtitle</h2>".to_string();
166///    let headings = extract_tag_content(html, "h1".to_string());
167///    // Returns: ["Main Title"]
168///    ```
169/// 
170/// 3. Extract list items:
171///    ```
172///    # use tagparser::extract_tag_content;
173///    # let html = "<ul><li>Item 1</li><li>Item 2</li></ul>".to_string();
174///    let items = extract_tag_content(html, "li".to_string());
175///    // Returns: ["Item 1", "Item 2"]
176///    ```
177pub fn extract_tag_content(html: String, tag: String) -> Vec<String> {
178    let mut parser = Parser::new(html);
179    parser.extract_tag_content(tag)
180}
181
182/// Extract attribute values from HTML tags
183/// 
184/// This function extracts the values of a specified attribute from all matching tags.
185/// 
186/// # Arguments
187/// 
188/// * `html` - HTML content to parse
189/// * `tag` - The HTML tag name to search for (e.g., "a", "img", "div")
190/// * `attr_name` - The attribute name to extract values from (e.g., "href", "src", "class")
191/// 
192/// # Returns
193/// 
194/// A vector of strings containing the attribute values from all matching tags
195/// 
196/// # Examples
197/// 
198/// Basic usage - extracting href values from links:
199/// 
200/// ```
201///     use tagparser::extract_attribute_values;
202///
203///     let html = r#"
204///         <a href='https://github.com'>GitHub</a>
205///         <a href='https://rust-lang.org' class='official'>Rust</a>
206///         <a class='social' href='https://twitter.com'>Twitter</a>
207///     "#.to_string();
208///     
209///     // Extract all href values from links
210///     let hrefs = extract_attribute_values(html.clone(), "a".to_string(), "href");
211///     assert_eq!(
212///         vec!["https://github.com", "https://rust-lang.org", "https://twitter.com"],
213///         hrefs
214///     );
215///     
216///     // Extract all class values from links
217///     let classes = extract_attribute_values(html.clone(), "a".to_string(), "class");
218///     assert_eq!(
219///         vec!["official", "social"],
220///         classes
221///     );
222/// ```
223/// 
224/// # Common Use Cases
225/// 
226/// 1. Extract all URLs from a page:
227///    ```
228///    # use tagparser::extract_attribute_values;
229///    # let html = "<a href='https://example.com'>Link</a>".to_string();
230///    let urls = extract_attribute_values(html, "a".to_string(), "href");
231///    // Returns: ["https://example.com"]
232///    ```
233/// 
234/// 2. Extract all image sources:
235///    ```
236///    # use tagparser::extract_attribute_values;
237///    # let html = "<img src='image.jpg' alt='Image'><img src='logo.png'>".to_string();
238///    let image_sources = extract_attribute_values(html, "img".to_string(), "src");
239///    // Returns: ["image.jpg", "logo.png"]
240///    ```
241/// 
242/// 3. Extract all form input names:
243///    ```
244///    # use tagparser::extract_attribute_values;
245///    # let html = "<input name='username'><input name='password'>".to_string();
246///    let input_names = extract_attribute_values(html, "input".to_string(), "name");
247///    // Returns: ["username", "password"]
248///    ```
249pub fn extract_attribute_values(html: String, tag: String, attr_name: &str) -> Vec<String> {
250    let mut parser = Parser::new(html);
251    parser.extract_attribute_values(tag, attr_name)
252}