shindan_maker/
html_utils.rs1use anyhow::{Context, Result};
2use scraper::{Html, Node};
3use serde_json::json;
4
5use crate::selectors::SELECTORS;
6
7#[cfg(feature = "segments")]
8use crate::segment::{Segment, Segments};
9
10#[cfg(feature = "html")]
11use {
12 crate::html_template::{APP_JS, CHART_JS, build_html},
13 anyhow::anyhow,
14 scraper::Element,
15};
16
17#[cfg(feature = "segments")]
18pub(crate) fn get_segments(response_text: &str) -> Result<Segments> {
19 let result_document = Html::parse_document(response_text);
20
21 let mut segments = Vec::new();
22
23 result_document
24 .select(&SELECTORS.post_display)
25 .next()
26 .context("Failed to get the next element")?
27 .children()
28 .for_each(|child| {
29 let node = child.value();
30 match node {
31 Node::Text(text) => {
32 let text = text.replace(" ", " ");
33 segments.push(Segment::new(
34 "text",
35 json!({
36 "text": text
37 }),
38 ));
39 }
40 Node::Element(element) => {
41 if element.name() == "br" {
42 let text = "\n".to_string();
43 segments.push(Segment::new(
44 "text",
45 json!({
46 "text": text
47 }),
48 ));
49 } else if element.name() == "img" {
50 let image_url = element
51 .attr("data-src")
52 .expect("Failed to get 'data-src' attribute")
53 .to_string();
54 segments.push(Segment::new(
55 "image",
56 json!({
57 "file": image_url
58 }),
59 ));
60 }
61 }
62 _ => {}
63 }
64 });
65
66 Ok(Segments(segments))
67}
68
69#[cfg(feature = "html")]
70pub(crate) fn get_html_str(id: &str, response_text: &str, base_url: &str) -> Result<String> {
71 let result_document = Html::parse_document(response_text);
72
73 let mut title_and_result = result_document
74 .select(&SELECTORS.title_and_result)
75 .next()
76 .context("Failed to get the next element")?
77 .html();
78
79 for effects_selector in &SELECTORS.effects {
80 let effects = result_document.select(effects_selector);
81 for effect in effects {
82 if let Some(next_el) = effect.next_sibling_element() {
83 if next_el.value().name() == "noscript" {
84 let content = next_el.inner_html();
85
86 title_and_result = title_and_result
87 .replace(&effect.html(), "")
88 .replace(&next_el.html(), &content);
89 }
90 }
91 }
92 }
93
94 let mut html = build_html()
95 .replace("<!-- TITLE_AND_RESULT -->", &title_and_result)
96 .replace(
97 "<!-- BASE_URL -->",
98 &format!(r#"<base href="{}">"#, base_url),
99 );
100
101 if response_text.contains("chart.js") {
102 let mut scripts = vec![
103 format!(r#"<script>{}</script>"#, APP_JS),
104 format!(r#"<script>{}</script>"#, CHART_JS),
105 ];
106
107 let shindan_script = get_first_script(&result_document, id)?;
108 scripts.push(shindan_script);
109 html = html.replace("<!-- SCRIPTS -->", &scripts.join("\n"));
110 }
111 Ok(html)
112}
113
114#[cfg(feature = "html")]
115pub(crate) fn get_first_script(result_document: &Html, id: &str) -> Result<String> {
116 for element in result_document.select(&SELECTORS.script) {
117 let html = element.html();
118 if html.contains(id) {
119 return Ok(html);
120 }
121 }
122
123 Err(anyhow!("Failed to find script with id {}", id))
124}
125
126pub(crate) fn extract_title_and_form_data(
127 html_content: &str,
128 name: &str,
129) -> Result<(String, Vec<(String, String)>)> {
130 let document = Html::parse_document(html_content);
131 let title = extract_title(&document)?;
132 let form_data = extract_form_data(&document, name)?;
133
134 Ok((title, form_data))
135}
136
137pub(crate) fn extract_title(dom: &Html) -> Result<String> {
138 Ok(dom
139 .select(&SELECTORS.shindan_title)
140 .next()
141 .context("Failed to get the next element")?
142 .value()
143 .attr("data-shindan_title")
144 .context("Failed to get 'data-shindan_title' attribute")?
145 .to_string())
146}
147
148pub(crate) fn extract_description(dom: &Html) -> Result<String> {
149 let mut desc = Vec::new();
150
151 dom.select(&SELECTORS.shindan_description_display)
152 .next()
153 .context("Failed to get the next element")?
154 .children()
155 .for_each(|child| {
156 let node = child.value();
157 match node {
158 Node::Text(text) => {
159 desc.push(text.to_string());
160 }
161 Node::Element(element) => {
162 if element.name() == "br" {
163 desc.push("\n".to_string());
164 } else if let Some(node) = child.children().next()
165 && let Node::Text(text) = node.value()
166 {
167 desc.push(text.to_string());
168 };
169 }
170 _ => {}
171 }
172 });
173
174 Ok(desc.join(""))
175}
176
177pub(crate) fn extract_form_data(dom: &Html, name: &str) -> Result<Vec<(String, String)>> {
178 const FIELDS: &[&str] = &["_token", "randname", "type"];
179 let mut form_data = Vec::with_capacity(FIELDS.len() + 1);
180
181 for (index, &field) in FIELDS.iter().enumerate() {
182 let value = dom
183 .select(&SELECTORS.form[index])
184 .next()
185 .and_then(|element| element.value().attr("value"))
186 .unwrap_or("")
187 .to_string();
188
189 form_data.push((field.to_string(), value));
190 }
191
192 form_data.push(("user_input_value_1".to_string(), name.to_string()));
193
194 for element in dom.select(&SELECTORS.input_parts) {
195 if let Some(input_name) = element.value().attr("name") {
196 form_data.push((input_name.to_string(), name.to_string()));
197 }
198 }
199
200 Ok(form_data)
201}