shindan_maker/
html_utils.rs

1use anyhow::{Context, Result};
2use scraper::{Html, Node};
3use serde_json::json;
4
5use crate::selectors::SELECTORS;
6
7#[cfg(feature = "segments")]
8use crate::segment::{Segment, Segments};
9
10#[cfg(feature = "html")]
11use {
12    crate::html_template::{APP_JS, CHART_JS, build_html},
13    anyhow::anyhow,
14    scraper::Element,
15};
16
17#[cfg(feature = "segments")]
18pub(crate) fn get_segments(response_text: &str) -> Result<Segments> {
19    let result_document = Html::parse_document(response_text);
20
21    let mut segments = Vec::new();
22
23    result_document
24        .select(&SELECTORS.post_display)
25        .next()
26        .context("Failed to get the next element")?
27        .children()
28        .for_each(|child| {
29            let node = child.value();
30            match node {
31                Node::Text(text) => {
32                    let text = text.replace("&nbsp;", " ");
33                    segments.push(Segment::new(
34                        "text",
35                        json!({
36                            "text": text
37                        }),
38                    ));
39                }
40                Node::Element(element) => {
41                    if element.name() == "br" {
42                        let text = "\n".to_string();
43                        segments.push(Segment::new(
44                            "text",
45                            json!({
46                                "text": text
47                            }),
48                        ));
49                    } else if element.name() == "img" {
50                        let image_url = element
51                            .attr("data-src")
52                            .expect("Failed to get 'data-src' attribute")
53                            .to_string();
54                        segments.push(Segment::new(
55                            "image",
56                            json!({
57                                "file": image_url
58                            }),
59                        ));
60                    }
61                }
62                _ => {}
63            }
64        });
65
66    Ok(Segments(segments))
67}
68
69#[cfg(feature = "html")]
70pub(crate) fn get_html_str(id: &str, response_text: &str, base_url: &str) -> Result<String> {
71    let result_document = Html::parse_document(response_text);
72
73    let mut title_and_result = result_document
74        .select(&SELECTORS.title_and_result)
75        .next()
76        .context("Failed to get the next element")?
77        .html();
78
79    for effects_selector in &SELECTORS.effects {
80        let effects = result_document.select(effects_selector);
81        for effect in effects {
82            if let Some(next_el) = effect.next_sibling_element() {
83                if next_el.value().name() == "noscript" {
84                    let content = next_el.inner_html();
85
86                    title_and_result = title_and_result
87                        .replace(&effect.html(), "")
88                        .replace(&next_el.html(), &content);
89                }
90            }
91        }
92    }
93
94    let mut html = build_html()
95        .replace("<!-- TITLE_AND_RESULT -->", &title_and_result)
96        .replace(
97            "<!-- BASE_URL -->",
98            &format!(r#"<base href="{}">"#, base_url),
99        );
100
101    if response_text.contains("chart.js") {
102        let mut scripts = vec![
103            format!(r#"<script>{}</script>"#, APP_JS),
104            format!(r#"<script>{}</script>"#, CHART_JS),
105        ];
106
107        let shindan_script = get_first_script(&result_document, id)?;
108        scripts.push(shindan_script);
109        html = html.replace("<!-- SCRIPTS -->", &scripts.join("\n"));
110    }
111    Ok(html)
112}
113
114#[cfg(feature = "html")]
115pub(crate) fn get_first_script(result_document: &Html, id: &str) -> Result<String> {
116    for element in result_document.select(&SELECTORS.script) {
117        let html = element.html();
118        if html.contains(id) {
119            return Ok(html);
120        }
121    }
122
123    Err(anyhow!("Failed to find script with id {}", id))
124}
125
126pub(crate) fn extract_title_and_form_data(
127    html_content: &str,
128    name: &str,
129) -> Result<(String, Vec<(String, String)>)> {
130    let document = Html::parse_document(html_content);
131    let title = extract_title(&document)?;
132    let form_data = extract_form_data(&document, name)?;
133
134    Ok((title, form_data))
135}
136
137pub(crate) fn extract_title(dom: &Html) -> Result<String> {
138    Ok(dom
139        .select(&SELECTORS.shindan_title)
140        .next()
141        .context("Failed to get the next element")?
142        .value()
143        .attr("data-shindan_title")
144        .context("Failed to get 'data-shindan_title' attribute")?
145        .to_string())
146}
147
148pub(crate) fn extract_description(dom: &Html) -> Result<String> {
149    let mut desc = Vec::new();
150
151    dom.select(&SELECTORS.shindan_description_display)
152        .next()
153        .context("Failed to get the next element")?
154        .children()
155        .for_each(|child| {
156            let node = child.value();
157            match node {
158                Node::Text(text) => {
159                    desc.push(text.to_string());
160                }
161                Node::Element(element) => {
162                    if element.name() == "br" {
163                        desc.push("\n".to_string());
164                    } else if let Some(node) = child.children().next()
165                        && let Node::Text(text) = node.value()
166                    {
167                        desc.push(text.to_string());
168                    };
169                }
170                _ => {}
171            }
172        });
173
174    Ok(desc.join(""))
175}
176
177pub(crate) fn extract_form_data(dom: &Html, name: &str) -> Result<Vec<(String, String)>> {
178    const FIELDS: &[&str] = &["_token", "randname", "type"];
179    let mut form_data = Vec::with_capacity(FIELDS.len() + 1);
180
181    for (index, &field) in FIELDS.iter().enumerate() {
182        let value = dom
183            .select(&SELECTORS.form[index])
184            .next()
185            .and_then(|element| element.value().attr("value"))
186            .unwrap_or("")
187            .to_string();
188
189        form_data.push((field.to_string(), value));
190    }
191
192    form_data.push(("user_input_value_1".to_string(), name.to_string()));
193
194    for element in dom.select(&SELECTORS.input_parts) {
195        if let Some(input_name) = element.value().attr("name") {
196            form_data.push((input_name.to_string(), name.to_string()));
197        }
198    }
199
200    Ok(form_data)
201}