html2md/
iframes.rs

1use std::sync::LazyLock;
2
3use super::StructuredPrinter;
4use super::TagHandler;
5
6use crate::common::get_tag_attr;
7use crate::dummy::IdentityHandler;
8
9use markup5ever_rcdom::Handle;
10use regex::Regex;
11
12/// Pattern that detects iframes with Youtube embedded videos<br/>
13/// Examples:
14/// * `https://www.youtube.com/embed/zE-dmXZp3nU?wmode=opaque`
15/// * `https://www.youtube-nocookie.com/embed/5yo6exIypkY`
16/// * `https://www.youtube.com/embed/TXm6IXrbQuM`
17static YOUTUBE_PATTERN: LazyLock<Regex> =
18    LazyLock::new(|| Regex::new(r"www\.youtube(?:-nocookie)?\.com/embed/([-\w]+)").unwrap());
19/// Pattern that detects iframes with Instagram embedded photos<br/>
20/// Examples:
21/// * `https://www.instagram.com/p/B1BKr9Wo8YX/embed/`
22/// * `https://www.instagram.com/p/BpKjlo-B4uI/embed/`
23static INSTAGRAM_PATTERN: LazyLock<Regex> =
24    LazyLock::new(|| Regex::new(r"www\.instagram\.com/p/([-\w]+)/embed").unwrap());
25/// Patter that detects iframes with VKontakte embedded videos<br/>
26/// Examples:
27/// * `https://vk.com/video_ext.php?oid=-49423435&id=456245092&hash=e1611aefe899c4f8`
28/// * `https://vk.com/video_ext.php?oid=-76477496&id=456239454&hash=ebfdc2d386617b97`
29static VK_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
30    Regex::new(r"vk\.com/video_ext\.php\?oid=(-?\d+)&id=(\d+)&hash=(.*)").unwrap()
31});
32#[allow(dead_code)]
33static YANDEX_MUSIC_TRACK_PATTERN: LazyLock<Regex> =
34    LazyLock::new(|| Regex::new(r"https://music.yandex.ru/iframe/#track/(\d+)/(\d+)").unwrap());
35#[allow(dead_code)]
36static YANDEX_MUSIC_ALBUM_PATTERN: LazyLock<Regex> =
37    LazyLock::new(|| Regex::new(r"https://music.yandex.ru/iframe/#album/(\d+)").unwrap());
38
39#[derive(Default)]
40pub struct IframeHandler;
41
42impl TagHandler for IframeHandler {
43    fn handle(&mut self, tag: &Handle, printer: &mut StructuredPrinter) {
44        printer.insert_newline();
45        printer.insert_newline();
46
47        let src = get_tag_attr(tag, "src");
48        //let width = get_tag_attr(tag, "width");
49        //let height = get_tag_attr(tag, "height");
50
51        if src.is_none() {
52            return;
53        }
54
55        let src = src.unwrap();
56
57        if let Some(capture) = YOUTUBE_PATTERN.captures(&src) {
58            let media_id = capture.get(1).map_or("", |m| m.as_str());
59            printer.append_str(&format!("[![Embedded YouTube video](https://img.youtube.com/vi/{mid}/0.jpg)](https://www.youtube.com/watch?v={mid})", mid = media_id));
60            return;
61        }
62
63        if let Some(capture) = INSTAGRAM_PATTERN.captures(&src) {
64            let media_id = capture.get(1).map_or("", |m| m.as_str());
65            printer.append_str(&format!("[![Embedded Instagram post](https://www.instagram.com/p/{mid}/media/?size=m)](https://www.instagram.com/p/{mid}/embed/)", mid = media_id));
66            return;
67        }
68
69        if let Some(capture) = VK_PATTERN.captures(&src) {
70            let owner_id = capture.get(1).map_or("", |m| m.as_str());
71            let video_id = capture.get(2).map_or("", |m| m.as_str());
72            let _hash = capture.get(3).map_or("", |m| m.as_str());
73            printer.append_str(&format!("[![Embedded VK video](https://st.vk.com/images/icons/video_empty_2x.png)](https://vk.com/video{oid}_{vid})", oid = owner_id, vid = video_id));
74            return;
75        }
76
77        // not found, use generic implementation
78        let mut identity = IdentityHandler;
79        identity.handle(tag, printer);
80    }
81
82    fn after_handle(&mut self, printer: &mut StructuredPrinter) {
83        printer.insert_newline();
84        printer.insert_newline();
85    }
86
87    fn skip_descendants(&self) -> bool {
88        true
89    }
90}