shindan_maker/
html_utils.rs1use serde_json::json;
2use scraper::{Html, Node};
3use anyhow::{Context, Result};
4
5use crate::selectors::SELECTORS;
6
7#[cfg(feature = "segments")]
8use crate::segment::{Segment, Segments};
9
10#[cfg(feature = "html")]
11use {
12 anyhow::anyhow,
13 scraper::Element,
14 crate::html_template::HTML_TEMPLATE,
15};
16
17#[cfg(feature = "segments")]
18pub(crate) fn get_segments(response_text: &str) -> Result<Segments> {
19 let result_document = Html::parse_document(response_text);
20
21 let mut segments = Vec::new();
22
23 result_document.select(&SELECTORS.post_display)
24 .next()
25 .context("Failed to get the next element")?
26 .children()
27 .for_each(|child| {
28 let node = child.value();
29 match node {
30 Node::Text(text) => {
31 let text = text.replace(" ", " ");
32 segments.push(Segment::new("text", json!({
33 "text": text
34 })));
35 }
36 Node::Element(element) => {
37 if element.name() == "br" {
38 let text = "\n".to_string();
39 segments.push(Segment::new("text", json!({
40 "text": text
41 })));
42 } else if element.name() == "img" {
43 let image_url = element.attr("data-src").expect("Failed to get 'data-src' attribute").to_string();
44 segments.push(Segment::new("image", json!({
45 "file": image_url
46 })));
47 }
48 }
49 _ => {}
50 }
51 });
52
53 Ok(Segments(segments))
54}
55
56#[cfg(feature = "html")]
57pub(crate) fn get_html_str(id: &str, response_text: &str) -> Result<String> {
58 let result_document = Html::parse_document(response_text);
59
60 let mut title_and_result = result_document
61 .select(&SELECTORS.title_and_result)
62 .next()
63 .context("Failed to get the next element")?
64 .html();
65
66 for effects_selector in &SELECTORS.effects {
67 let effects = result_document.select(effects_selector);
68 for effect in effects {
69 if let Some(next_el) = effect.next_sibling_element() {
70 if next_el.value().name() == "noscript" {
71 let content = next_el.inner_html();
72
73 title_and_result = title_and_result.replace(&effect.html(), "")
74 .replace(&next_el.html(), &content);
75 }
76 }
77 }
78 }
79
80 let mut html = HTML_TEMPLATE
81 .replace("<!-- TITLE_AND_RESULT -->", &title_and_result);
82
83 if response_text.contains("chart.js") {
84 let mut scripts = vec![
85 r#"<script src="https://cn.shindanmaker.com/js/app.js?id=163959a7e23bfa7264a0ddefb3c36f13" defer=""></script>"#,
86 r#"<script src="https://cn.shindanmaker.com/js/chart.js?id=391e335afc72362acd6bf1ea1ba6b74c" defer=""></script>"#];
87
88 let shindan_script = get_first_script(&result_document, id)?;
89 scripts.push(&shindan_script);
90 html = html.replace("<!-- SCRIPTS -->", &scripts.join("\n"));
91 }
92 Ok(html)
93}
94
95#[cfg(feature = "html")]
96pub(crate) fn get_first_script(result_document: &Html, id: &str) -> Result<String> {
97 for element in result_document.select(&SELECTORS.script) {
98 let html = element.html();
99 if html.contains(id) {
100 return Ok(html);
101 }
102 }
103
104 Err(anyhow!("Failed to find script with id {}", id))
105}
106
107pub(crate) fn extract_title_and_form_data(html_content: &str, name: &str) -> Result<(String, Vec<(&'static str, String)>)> {
108 let document = Html::parse_document(html_content);
109 let title = extract_title(&document)?;
110 let form_data = extract_form_data(&document, name)?;
111
112 Ok((title, form_data))
113}
114
115pub(crate) fn extract_title(dom: &Html) -> Result<String> {
116 Ok(dom
117 .select(&SELECTORS.shindan_title)
118 .next()
119 .context("Failed to get the next element")?
120 .value().attr("data-shindan_title")
121 .context("Failed to get 'data-shindan_title' attribute")?
122 .to_string())
123}
124
125pub(crate) fn extract_description(dom: &Html) -> Result<String> {
126 let mut desc = Vec::new();
127
128 dom
129 .select(&SELECTORS.shindan_description_display)
130 .next()
131 .context("Failed to get the next element")?
132 .children()
133 .for_each(|child| {
134 let node = child.value();
135 match node {
136 Node::Text(text) => {
137 desc.push(text.to_string());
138 }
139 Node::Element(element) => {
140 if element.name() == "br" {
141 desc.push("\n".to_string());
142 } else if let Some(node) = child.children().next() {
143 if let Node::Text(text) = node.value() {
144 desc.push(text.to_string());
145 };
146 }
147 }
148 _ => {}
149 }
150 });
151
152 Ok(desc.join(""))
153}
154
155pub(crate) fn extract_form_data(
156 dom: &Html,
157 name: &str,
158) -> Result<Vec<(&'static str, String)>> {
159 const FIELDS: &[&str] = &["_token", "randname", "type"];
160 let mut form_data = Vec::with_capacity(FIELDS.len() + 1);
161
162 for (index, &field) in FIELDS.iter().enumerate() {
163 let value = dom
164 .select(&SELECTORS.form[index])
165 .next()
166 .context("Failed to get the next element")?
167 .value()
168 .attr("value")
169 .context("Failed to get value attribute")?;
170
171 form_data.push((field, value.to_string()));
172 }
173
174 form_data.push(("user_input_value_1", name.to_string()));
175
176 Ok(form_data)
177}