shindan_maker/
html_utils.rs1use anyhow::{Context, Result};
2use scraper::{Html, Node};
3use serde_json::json;
4
5use crate::selectors::SELECTORS;
6
7#[cfg(feature = "segments")]
8use crate::segment::{Segment, Segments};
9
10#[cfg(feature = "html")]
11use {crate::html_template::HTML_TEMPLATE, anyhow::anyhow, scraper::Element};
12
13#[cfg(feature = "segments")]
14pub(crate) fn get_segments(response_text: &str) -> Result<Segments> {
15 let result_document = Html::parse_document(response_text);
16
17 let mut segments = Vec::new();
18
19 result_document
20 .select(&SELECTORS.post_display)
21 .next()
22 .context("Failed to get the next element")?
23 .children()
24 .for_each(|child| {
25 let node = child.value();
26 match node {
27 Node::Text(text) => {
28 let text = text.replace(" ", " ");
29 segments.push(Segment::new(
30 "text",
31 json!({
32 "text": text
33 }),
34 ));
35 }
36 Node::Element(element) => {
37 if element.name() == "br" {
38 let text = "\n".to_string();
39 segments.push(Segment::new(
40 "text",
41 json!({
42 "text": text
43 }),
44 ));
45 } else if element.name() == "img" {
46 let image_url = element
47 .attr("data-src")
48 .expect("Failed to get 'data-src' attribute")
49 .to_string();
50 segments.push(Segment::new(
51 "image",
52 json!({
53 "file": image_url
54 }),
55 ));
56 }
57 }
58 _ => {}
59 }
60 });
61
62 Ok(Segments(segments))
63}
64
65#[cfg(feature = "html")]
66pub(crate) fn get_html_str(id: &str, response_text: &str, base_url: &str) -> Result<String> {
67 let result_document = Html::parse_document(response_text);
68
69 let mut title_and_result = result_document
70 .select(&SELECTORS.title_and_result)
71 .next()
72 .context("Failed to get the next element")?
73 .html();
74
75 for effects_selector in &SELECTORS.effects {
76 let effects = result_document.select(effects_selector);
77 for effect in effects {
78 if let Some(next_el) = effect.next_sibling_element() {
79 if next_el.value().name() == "noscript" {
80 let content = next_el.inner_html();
81
82 title_and_result = title_and_result
83 .replace(&effect.html(), "")
84 .replace(&next_el.html(), &content);
85 }
86 }
87 }
88 }
89
90 let mut html = HTML_TEMPLATE
91 .replace("<!-- TITLE_AND_RESULT -->", &title_and_result)
92 .replace(
93 "<!-- BASE_URL -->",
94 &format!(r#"<base href="{}">"#, base_url),
95 );
96
97 if response_text.contains("chart.js") {
98 let mut scripts = vec![
99 r#"<script src="/js/app.js" defer=""></script>"#,
100 r#"<script src="/js/chart.js" defer=""></script>"#,
101 ];
102
103 let shindan_script = get_first_script(&result_document, id)?;
104 scripts.push(&shindan_script);
105 html = html.replace("<!-- SCRIPTS -->", &scripts.join("\n"));
106 }
107 Ok(html)
108}
109
110#[cfg(feature = "html")]
111pub(crate) fn get_first_script(result_document: &Html, id: &str) -> Result<String> {
112 for element in result_document.select(&SELECTORS.script) {
113 let html = element.html();
114 if html.contains(id) {
115 return Ok(html);
116 }
117 }
118
119 Err(anyhow!("Failed to find script with id {}", id))
120}
121
122pub(crate) fn extract_title_and_form_data(
123 html_content: &str,
124 name: &str,
125) -> Result<(String, Vec<(String, String)>)> {
126 let document = Html::parse_document(html_content);
127 let title = extract_title(&document)?;
128 let form_data = extract_form_data(&document, name)?;
129
130 Ok((title, form_data))
131}
132
133pub(crate) fn extract_title(dom: &Html) -> Result<String> {
134 Ok(dom
135 .select(&SELECTORS.shindan_title)
136 .next()
137 .context("Failed to get the next element")?
138 .value()
139 .attr("data-shindan_title")
140 .context("Failed to get 'data-shindan_title' attribute")?
141 .to_string())
142}
143
144pub(crate) fn extract_description(dom: &Html) -> Result<String> {
145 let mut desc = Vec::new();
146
147 dom.select(&SELECTORS.shindan_description_display)
148 .next()
149 .context("Failed to get the next element")?
150 .children()
151 .for_each(|child| {
152 let node = child.value();
153 match node {
154 Node::Text(text) => {
155 desc.push(text.to_string());
156 }
157 Node::Element(element) => {
158 if element.name() == "br" {
159 desc.push("\n".to_string());
160 } else if let Some(node) = child.children().next()
161 && let Node::Text(text) = node.value()
162 {
163 desc.push(text.to_string());
164 };
165 }
166 _ => {}
167 }
168 });
169
170 Ok(desc.join(""))
171}
172
173pub(crate) fn extract_form_data(dom: &Html, name: &str) -> Result<Vec<(String, String)>> {
174 const FIELDS: &[&str] = &["_token", "randname", "type"];
175 let mut form_data = Vec::with_capacity(FIELDS.len() + 1);
176
177 for (index, &field) in FIELDS.iter().enumerate() {
178 let value = dom
179 .select(&SELECTORS.form[index])
180 .next()
181 .and_then(|element| element.value().attr("value"))
182 .unwrap_or("")
183 .to_string();
184
185 form_data.push((field.to_string(), value));
186 }
187
188 form_data.push(("user_input_value_1".to_string(), name.to_string()));
189
190 for element in dom.select(&SELECTORS.input_parts) {
191 if let Some(input_name) = element.value().attr("name") {
192 form_data.push((input_name.to_string(), name.to_string()));
193 }
194 }
195
196 Ok(form_data)
197}