shindan_maker/
html_utils.rs

1use anyhow::{Context, Result};
2use scraper::{Html, Node};
3use serde_json::json;
4
5use crate::selectors::SELECTORS;
6
7#[cfg(feature = "segments")]
8use crate::segment::{Segment, Segments};
9
10#[cfg(feature = "html")]
11use {crate::html_template::HTML_TEMPLATE, anyhow::anyhow, scraper::Element};
12
13#[cfg(feature = "segments")]
14pub(crate) fn get_segments(response_text: &str) -> Result<Segments> {
15    let result_document = Html::parse_document(response_text);
16
17    let mut segments = Vec::new();
18
19    result_document
20        .select(&SELECTORS.post_display)
21        .next()
22        .context("Failed to get the next element")?
23        .children()
24        .for_each(|child| {
25            let node = child.value();
26            match node {
27                Node::Text(text) => {
28                    let text = text.replace("&nbsp;", " ");
29                    segments.push(Segment::new(
30                        "text",
31                        json!({
32                            "text": text
33                        }),
34                    ));
35                }
36                Node::Element(element) => {
37                    if element.name() == "br" {
38                        let text = "\n".to_string();
39                        segments.push(Segment::new(
40                            "text",
41                            json!({
42                                "text": text
43                            }),
44                        ));
45                    } else if element.name() == "img" {
46                        let image_url = element
47                            .attr("data-src")
48                            .expect("Failed to get 'data-src' attribute")
49                            .to_string();
50                        segments.push(Segment::new(
51                            "image",
52                            json!({
53                                "file": image_url
54                            }),
55                        ));
56                    }
57                }
58                _ => {}
59            }
60        });
61
62    Ok(Segments(segments))
63}
64
65#[cfg(feature = "html")]
66pub(crate) fn get_html_str(id: &str, response_text: &str, base_url: &str) -> Result<String> {
67    let result_document = Html::parse_document(response_text);
68
69    let mut title_and_result = result_document
70        .select(&SELECTORS.title_and_result)
71        .next()
72        .context("Failed to get the next element")?
73        .html();
74
75    for effects_selector in &SELECTORS.effects {
76        let effects = result_document.select(effects_selector);
77        for effect in effects {
78            if let Some(next_el) = effect.next_sibling_element() {
79                if next_el.value().name() == "noscript" {
80                    let content = next_el.inner_html();
81
82                    title_and_result = title_and_result
83                        .replace(&effect.html(), "")
84                        .replace(&next_el.html(), &content);
85                }
86            }
87        }
88    }
89
90    let mut html = HTML_TEMPLATE
91        .replace("<!-- TITLE_AND_RESULT -->", &title_and_result)
92        .replace(
93            "<!-- BASE_URL -->",
94            &format!(r#"<base href="{}">"#, base_url),
95        );
96
97    if response_text.contains("chart.js") {
98        let mut scripts = vec![
99            r#"<script src="/js/app.js" defer=""></script>"#,
100            r#"<script src="/js/chart.js" defer=""></script>"#,
101        ];
102
103        let shindan_script = get_first_script(&result_document, id)?;
104        scripts.push(&shindan_script);
105        html = html.replace("<!-- SCRIPTS -->", &scripts.join("\n"));
106    }
107    Ok(html)
108}
109
110#[cfg(feature = "html")]
111pub(crate) fn get_first_script(result_document: &Html, id: &str) -> Result<String> {
112    for element in result_document.select(&SELECTORS.script) {
113        let html = element.html();
114        if html.contains(id) {
115            return Ok(html);
116        }
117    }
118
119    Err(anyhow!("Failed to find script with id {}", id))
120}
121
122pub(crate) fn extract_title_and_form_data(
123    html_content: &str,
124    name: &str,
125) -> Result<(String, Vec<(String, String)>)> {
126    let document = Html::parse_document(html_content);
127    let title = extract_title(&document)?;
128    let form_data = extract_form_data(&document, name)?;
129
130    Ok((title, form_data))
131}
132
133pub(crate) fn extract_title(dom: &Html) -> Result<String> {
134    Ok(dom
135        .select(&SELECTORS.shindan_title)
136        .next()
137        .context("Failed to get the next element")?
138        .value()
139        .attr("data-shindan_title")
140        .context("Failed to get 'data-shindan_title' attribute")?
141        .to_string())
142}
143
144pub(crate) fn extract_description(dom: &Html) -> Result<String> {
145    let mut desc = Vec::new();
146
147    dom.select(&SELECTORS.shindan_description_display)
148        .next()
149        .context("Failed to get the next element")?
150        .children()
151        .for_each(|child| {
152            let node = child.value();
153            match node {
154                Node::Text(text) => {
155                    desc.push(text.to_string());
156                }
157                Node::Element(element) => {
158                    if element.name() == "br" {
159                        desc.push("\n".to_string());
160                    } else if let Some(node) = child.children().next()
161                        && let Node::Text(text) = node.value()
162                    {
163                        desc.push(text.to_string());
164                    };
165                }
166                _ => {}
167            }
168        });
169
170    Ok(desc.join(""))
171}
172
173pub(crate) fn extract_form_data(dom: &Html, name: &str) -> Result<Vec<(String, String)>> {
174    const FIELDS: &[&str] = &["_token", "randname", "type"];
175    let mut form_data = Vec::with_capacity(FIELDS.len() + 1);
176
177    for (index, &field) in FIELDS.iter().enumerate() {
178        let value = dom
179            .select(&SELECTORS.form[index])
180            .next()
181            .and_then(|element| element.value().attr("value"))
182            .unwrap_or("")
183            .to_string();
184
185        form_data.push((field.to_string(), value));
186    }
187
188    form_data.push(("user_input_value_1".to_string(), name.to_string()));
189
190    for element in dom.select(&SELECTORS.input_parts) {
191        if let Some(input_name) = element.value().attr("name") {
192            form_data.push((input_name.to_string(), name.to_string()));
193        }
194    }
195
196    Ok(form_data)
197}