shindan_maker/
html_utils.rs

1use serde_json::json;
2use scraper::{Html, Node};
3use anyhow::{Context, Result};
4
5use crate::selectors::SELECTORS;
6
7#[cfg(feature = "segments")]
8use crate::segment::{Segment, Segments};
9
10#[cfg(feature = "html")]
11use {
12    anyhow::anyhow,
13    scraper::Element,
14    crate::html_template::HTML_TEMPLATE,
15};
16
17#[cfg(feature = "segments")]
18pub(crate) fn get_segments(response_text: &str) -> Result<Segments> {
19    let result_document = Html::parse_document(response_text);
20
21    let mut segments = Vec::new();
22
23    result_document.select(&SELECTORS.post_display)
24        .next()
25        .context("Failed to get the next element")?
26        .children()
27        .for_each(|child| {
28            let node = child.value();
29            match node {
30                Node::Text(text) => {
31                    let text = text.replace("&nbsp;", " ");
32                    segments.push(Segment::new("text", json!({
33                            "text": text
34                        })));
35                }
36                Node::Element(element) => {
37                    if element.name() == "br" {
38                        let text = "\n".to_string();
39                        segments.push(Segment::new("text", json!({
40                                "text": text
41                            })));
42                    } else if element.name() == "img" {
43                        let image_url = element.attr("data-src").expect("Failed to get 'data-src' attribute").to_string();
44                        segments.push(Segment::new("image", json!({
45                                "file": image_url
46                            })));
47                    }
48                }
49                _ => {}
50            }
51        });
52
53    Ok(Segments(segments))
54}
55
56#[cfg(feature = "html")]
57pub(crate) fn get_html_str(id: &str, response_text: &str) -> Result<String> {
58    let result_document = Html::parse_document(response_text);
59
60    let mut title_and_result = result_document
61        .select(&SELECTORS.title_and_result)
62        .next()
63        .context("Failed to get the next element")?
64        .html();
65
66    for effects_selector in &SELECTORS.effects {
67        let effects = result_document.select(effects_selector);
68        for effect in effects {
69            if let Some(next_el) = effect.next_sibling_element() {
70                if next_el.value().name() == "noscript" {
71                    let content = next_el.inner_html();
72
73                    title_and_result = title_and_result.replace(&effect.html(), "")
74                        .replace(&next_el.html(), &content);
75                }
76            }
77        }
78    }
79
80    let mut html = HTML_TEMPLATE
81        .replace("<!-- TITLE_AND_RESULT -->", &title_and_result);
82
83    if response_text.contains("chart.js") {
84        let mut scripts = vec![
85            r#"<script src="https://cn.shindanmaker.com/js/app.js?id=163959a7e23bfa7264a0ddefb3c36f13" defer=""></script>"#,
86            r#"<script src="https://cn.shindanmaker.com/js/chart.js?id=391e335afc72362acd6bf1ea1ba6b74c" defer=""></script>"#];
87
88        let shindan_script = get_first_script(&result_document, id)?;
89        scripts.push(&shindan_script);
90        html = html.replace("<!-- SCRIPTS -->", &scripts.join("\n"));
91    }
92    Ok(html)
93}
94
95#[cfg(feature = "html")]
96pub(crate) fn get_first_script(result_document: &Html, id: &str) -> Result<String> {
97    for element in result_document.select(&SELECTORS.script) {
98        let html = element.html();
99        if html.contains(id) {
100            return Ok(html);
101        }
102    }
103
104    Err(anyhow!("Failed to find script with id {}", id))
105}
106
107pub(crate) fn extract_title_and_form_data(html_content: &str, name: &str) -> Result<(String, Vec<(&'static str, String)>)> {
108    let document = Html::parse_document(html_content);
109    let title = extract_title(&document)?;
110    let form_data = extract_form_data(&document, name)?;
111
112    Ok((title, form_data))
113}
114
115pub(crate) fn extract_title(dom: &Html) -> Result<String> {
116    Ok(dom
117        .select(&SELECTORS.shindan_title)
118        .next()
119        .context("Failed to get the next element")?
120        .value().attr("data-shindan_title")
121        .context("Failed to get 'data-shindan_title' attribute")?
122        .to_string())
123}
124
125pub(crate) fn extract_description(dom: &Html) -> Result<String> {
126    let mut desc = Vec::new();
127
128    dom
129        .select(&SELECTORS.shindan_description_display)
130        .next()
131        .context("Failed to get the next element")?
132        .children()
133        .for_each(|child| {
134            let node = child.value();
135            match node {
136                Node::Text(text) => {
137                    desc.push(text.to_string());
138                }
139                Node::Element(element) => {
140                    if element.name() == "br" {
141                        desc.push("\n".to_string());
142                    } else if let Some(node) = child.children().next() {
143                        if let Node::Text(text) = node.value() {
144                            desc.push(text.to_string());
145                        };
146                    }
147                }
148                _ => {}
149            }
150        });
151
152    Ok(desc.join(""))
153}
154
155pub(crate) fn extract_form_data(
156    dom: &Html,
157    name: &str,
158) -> Result<Vec<(&'static str, String)>> {
159    const FIELDS: &[&str] = &["_token", "randname", "type"];
160    let mut form_data = Vec::with_capacity(FIELDS.len() + 1);
161
162    for (index, &field) in FIELDS.iter().enumerate() {
163        let value = dom
164            .select(&SELECTORS.form[index])
165            .next()
166            .context("Failed to get the next element")?
167            .value()
168            .attr("value")
169            .context("Failed to get value attribute")?;
170
171        form_data.push((field, value.to_string()));
172    }
173
174    form_data.push(("user_input_value_1", name.to_string()));
175
176    Ok(form_data)
177}