1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123
//! This module implements parsers to extract hyperlinks and image elements //! from HTML text input. The parsers in this module search for HTML only, //! no other markup languages are recognized. #![allow(dead_code)] use crate::parser::image::html_img_link; use nom::bytes::complete::take_till; use nom::character::complete::anychar; use parse_hyperlinks::parser::html::html_text2dest_link; use parse_hyperlinks::parser::Link; /// Consumes the input until the parser finds an HTML formatted _inline image_ (`Link::Image`). /// /// The parser consumes the finding and returns /// `Ok((remaining_input, (skipped_input, Link)))` or some error. /// /// /// # HTML /// /// ``` /// use parse_hyperlinks::parser::Link; /// use parse_hyperlinks_html::parser::parse::take_img_link; /// use std::borrow::Cow; /// /// let i = r#"abc<img src="destination1" alt="text1">abc /// abc<img src="destination2" alt="text2">abc /// "#; /// /// let (i, r) = take_img_link(i).unwrap(); /// assert_eq!(r.0, "abc"); /// assert_eq!(r.1, Link::Image(Cow::from("text1"), Cow::from("destination1"))); /// let (i, r) = take_img_link(i).unwrap(); /// assert_eq!(r.0, "abc\nabc"); /// assert_eq!(r.1, Link::Image(Cow::from("text2"), Cow::from("destination2"))); /// ``` pub fn take_img_link(i: &str) -> nom::IResult<&str, (&str, Link)> { let mut j = i; let mut skip_count = 0; let res = loop { // Start searching for inline images. // Regular `Link::Image` can start everywhere. if let Ok((k, r)) = html_img_link(j) { break (k, r); }; // This makes sure that we advance. let (k, _) = anychar(j)?; skip_count += j.len() - k.len(); j = k; // This might not consume bytes and never fails. let (k, _) = take_till(|c| c == '<')(j)?; skip_count += j.len() - k.len(); j = k; }; // We found a link. Return it. let (l, link) = res; let skipped_input = &i[0..skip_count]; Ok((l, (skipped_input, link))) } /// Consumes the input until the parser finds an HTML formatted hyperlink _text2dest_ /// (`Link::Text2Dest`). /// /// The parser consumes the finding and returns /// `Ok((remaining_input, (skipped_input, Link)))` or some error. /// /// /// # HTML /// /// ``` /// use parse_hyperlinks::parser::Link; /// use parse_hyperlinks_html::parser::parse::take_link; /// use std::borrow::Cow; /// /// let i = "abc<a href=\"dest1\" title=\"title1\">text1</a>abc\ /// abc<a href=\"dest2\" title=\"title2\">text2</a>abc"; /// /// let (i, r) = take_link(i).unwrap(); /// assert_eq!(r.0, "abc"); /// assert_eq!(r.1, Link::Text2Dest(Cow::from("text1"), Cow::from("dest1"), Cow::from("title1"))); /// let (i, r) = take_link(i).unwrap(); /// assert_eq!(r.0, "abcabc"); /// assert_eq!(r.1, Link::Text2Dest(Cow::from("text2"), Cow::from("dest2"), Cow::from("title2"))); /// ``` pub fn take_link(i: &str) -> nom::IResult<&str, (&str, Link)> { let mut j = i; let mut skip_count = 0; let res = loop { // Start searching for inline hyperlinks. // Regular `Link::Text2Dest` can start everywhere. if let Ok((k, r)) = html_text2dest_link(j) { break (k, r); }; // This makes sure that we advance. let (k, _) = anychar(j)?; skip_count += j.len() - k.len(); j = k; // This might not consume bytes and never fails. let (k, _) = take_till(|c| c == '<')(j)?; skip_count += j.len() - k.len(); j = k; }; // We found a link. Return it. let (l, link) = res; let skipped_input = &i[0..skip_count]; Ok((l, (skipped_input, link))) }