html2md/
iframes.rs

1use std::sync::LazyLock;
2
3use super::StructuredPrinter;
4use super::TagHandler;
5
6use crate::common::get_tag_attr;
7use crate::dummy::IdentityHandler;
8use crate::markup5ever_rcdom;
9
10use markup5ever_rcdom::Handle;
11use regex::Regex;
12
13/// Pattern that detects iframes with Youtube embedded videos<br/>
14/// Examples:
15/// * `https://www.youtube.com/embed/zE-dmXZp3nU?wmode=opaque`
16/// * `https://www.youtube-nocookie.com/embed/5yo6exIypkY`
17/// * `https://www.youtube.com/embed/TXm6IXrbQuM`
18static YOUTUBE_PATTERN: LazyLock<Regex> =
19    LazyLock::new(|| Regex::new(r"www\.youtube(?:-nocookie)?\.com/embed/([-\w]+)").unwrap());
20/// Pattern that detects iframes with Instagram embedded photos<br/>
21/// Examples:
22/// * `https://www.instagram.com/p/B1BKr9Wo8YX/embed/`
23/// * `https://www.instagram.com/p/BpKjlo-B4uI/embed/`
24static INSTAGRAM_PATTERN: LazyLock<Regex> =
25    LazyLock::new(|| Regex::new(r"www\.instagram\.com/p/([-\w]+)/embed").unwrap());
26/// Patter that detects iframes with VKontakte embedded videos<br/>
27/// Examples:
28/// * `https://vk.com/video_ext.php?oid=-49423435&id=456245092&hash=e1611aefe899c4f8`
29/// * `https://vk.com/video_ext.php?oid=-76477496&id=456239454&hash=ebfdc2d386617b97`
30static VK_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
31    Regex::new(r"vk\.com/video_ext\.php\?oid=(-?\d+)&id=(\d+)&hash=(.*)").unwrap()
32});
33#[allow(dead_code)]
34static YANDEX_MUSIC_TRACK_PATTERN: LazyLock<Regex> =
35    LazyLock::new(|| Regex::new(r"https://music.yandex.ru/iframe/#track/(\d+)/(\d+)").unwrap());
36#[allow(dead_code)]
37static YANDEX_MUSIC_ALBUM_PATTERN: LazyLock<Regex> =
38    LazyLock::new(|| Regex::new(r"https://music.yandex.ru/iframe/#album/(\d+)").unwrap());
39
40#[derive(Default)]
41pub struct IframeHandler;
42
43impl TagHandler for IframeHandler {
44    fn handle(&mut self, tag: &Handle, printer: &mut StructuredPrinter) {
45        printer.insert_newline();
46        printer.insert_newline();
47
48        let src = get_tag_attr(tag, "src");
49        //let width = get_tag_attr(tag, "width");
50        //let height = get_tag_attr(tag, "height");
51
52        if src.is_none() {
53            return;
54        }
55
56        let src = src.unwrap();
57
58        if let Some(capture) = YOUTUBE_PATTERN.captures(&src) {
59            let media_id = capture.get(1).map_or("", |m| m.as_str());
60            printer.append_str(&format!("[![Embedded YouTube video](https://img.youtube.com/vi/{mid}/0.jpg)](https://www.youtube.com/watch?v={mid})", mid = media_id));
61            return;
62        }
63
64        if let Some(capture) = INSTAGRAM_PATTERN.captures(&src) {
65            let media_id = capture.get(1).map_or("", |m| m.as_str());
66            printer.append_str(&format!("[![Embedded Instagram post](https://www.instagram.com/p/{mid}/media/?size=m)](https://www.instagram.com/p/{mid}/embed/)", mid = media_id));
67            return;
68        }
69
70        if let Some(capture) = VK_PATTERN.captures(&src) {
71            let owner_id = capture.get(1).map_or("", |m| m.as_str());
72            let video_id = capture.get(2).map_or("", |m| m.as_str());
73            let _hash = capture.get(3).map_or("", |m| m.as_str());
74            printer.append_str(&format!("[![Embedded VK video](https://st.vk.com/images/icons/video_empty_2x.png)](https://vk.com/video{oid}_{vid})", oid = owner_id, vid = video_id));
75            return;
76        }
77
78        // not found, use generic implementation
79        let mut identity = IdentityHandler;
80        identity.handle(tag, printer);
81    }
82
83    fn after_handle(&mut self, printer: &mut StructuredPrinter) {
84        printer.insert_newline();
85        printer.insert_newline();
86    }
87
88    fn skip_descendants(&self) -> bool {
89        true
90    }
91}