parse_book_source/analyzer/
analyzer_manager.rs1use std::{collections::HashMap, sync::LazyLock};
2
3use super::{Analyzer, AnalyzerType, Analyzers, SingleRule, json::value_to_string};
4use crate::{Result, utils::replace_all};
5use anyhow::anyhow;
6use regex::Regex;
7use serde_json::Value;
8
9static SPLIT_RULE: LazyLock<Regex> = LazyLock::new(|| {
10 Regex::new(
11 r"@css:|@json:|@http:|@xpath:|@match:|@regex:|@regexp:|@replace:|@encode:|@decode:|^",
12 )
13 .unwrap()
14});
15static EXPRESSION: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\{\{(.+?)\}\}").unwrap());
16static PUT_RULE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"@put:\{(.+?):(.+?)\}").unwrap());
17static GET_RULE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"@get:\{(.+?)\}").unwrap());
18
19#[derive(Debug, Clone)]
20pub struct AnalyzerManager {
21 pub analyzers: Vec<Analyzers>,
22 pub variables: HashMap<String, String>,
23}
24
25impl AnalyzerManager {
26 pub fn new() -> Result<Self> {
27 Ok(Self {
28 analyzers: vec![
29 Analyzers::new(r"^@css:", None, AnalyzerType::Html)?,
30 Analyzers::new(r"^@json:|^\$", Some(r"^@json:"), AnalyzerType::JsonPath)?,
31 Analyzers::new("", None, AnalyzerType::Default)?,
32 ],
33 variables: HashMap::new(),
34 })
35 }
36
37 pub fn set<T: ToString>(&mut self, key: &str, value: T) {
38 self.variables.insert(key.to_string(), value.to_string());
39 }
40
41 pub fn get_analyzer(&self, rule: &str) -> &Analyzers {
42 self.analyzers
43 .iter()
44 .find(|a| a.pattern.is_match(rule.trim()))
45 .unwrap()
46 }
47
48 pub fn split_rule_resolve(&self, rule: &str) -> Result<Vec<SingleRule>> {
49 let rule_match = SPLIT_RULE.find_iter(rule).collect::<Vec<_>>();
50 let mut rule_list: Vec<SingleRule> = vec![];
51 let mut end = rule.len();
52
53 for i in rule_match.iter().rev() {
54 let mut r = rule[i.start()..end].to_string();
55 end = i.start();
56
57 let analyzer = self.get_analyzer(&r);
58
59 r = analyzer
60 .replace
61 .as_ref()
62 .unwrap_or(&analyzer.pattern)
63 .replace(&r, "")
64 .to_string();
65
66 if let Some(index) = r.find("##") {
67 let (r, replace) = r.split_at(index);
69
70 rule_list.push(SingleRule::new(
71 r,
72 Some(&replace[2..]),
74 analyzer.analyzer.clone(),
75 )?);
76 } else {
77 rule_list.push(SingleRule::new(&r, None, analyzer.analyzer.clone())?);
78 }
79 }
80
81 rule_list.reverse();
82 Ok(rule_list)
83 }
84
85 fn _get_elements(analyzer: &dyn Analyzer, rule: &str) -> Result<Vec<String>> {
86 if rule.contains("&&") {
87 let mut res = vec![];
88 for simple_rule in rule.split("&&") {
89 let mut r = Self::_get_elements(analyzer, simple_rule)?;
90
91 if !r.is_empty() {
92 res.append(&mut r);
93 }
94 }
95 return Ok(res);
96 } else if rule.contains("||") {
97 for simple_rule in rule.split("||") {
98 let r = Self::_get_elements(analyzer, simple_rule)?;
99
100 if !r.is_empty() {
101 return Ok(r);
102 };
103 }
104 }
105 analyzer.get_elements(rule)
106 }
107
108 pub fn get_element(&self, rule: &str, data: &str) -> Result<Vec<String>> {
109 let mut temp = data.to_string();
110
111 for single_rule in self.split_rule_resolve(rule)? {
112 let analyzer = single_rule.analyzer.parse_to_analyzer(&temp)?;
113 temp = Self::_get_elements(analyzer.as_ref(), &single_rule.rule)?
114 .join("_______split_______");
115 }
116
117 Ok(temp
118 .split("_______split_______")
119 .map(|s| s.to_string())
120 .collect())
121 }
122
123 fn _get_string(
124 single_rule: &SingleRule,
125 analyzer: &dyn Analyzer,
126 rule: &str,
127 ) -> Result<String> {
128 let mut result = String::new();
129
130 if rule.contains("&&") {
131 let mut res = vec![];
132 for simple_rule in rule.split("&&") {
133 let r = Self::_get_string(single_rule, analyzer, simple_rule)?;
134
135 if !r.is_empty() {
136 res.push(r);
137 }
138 }
139
140 return Ok(res.join(" "));
141 } else if rule.contains("||") {
142 for simple_rule in rule.split("||") {
143 let r = Self::_get_string(single_rule, analyzer, simple_rule)?;
144
145 if !r.is_empty() {
146 return Ok(r);
147 };
148 }
149 } else {
150 result = analyzer.get_string(rule)?.trim().to_string()
151 }
152
153 if result.is_empty() {
154 Ok(result)
155 } else {
156 Ok(single_rule.replace_content(&result)?)
157 }
158 }
159
160 fn put_variable(&mut self, rule: &str, data: &str) -> Result<String> {
161 replace_all(&PUT_RULE, rule, |capture| {
162 let key = capture
163 .get(1)
164 .ok_or(anyhow!("key is not found"))?
165 .as_str()
166 .trim();
167
168 let sub_rule = capture
169 .get(2)
170 .ok_or(anyhow!("value rule is not found"))?
171 .as_str()
172 .trim();
173
174 let v = self.get_string(sub_rule, data, None)?;
175 self.variables.insert(key.to_string(), v);
176 Ok("".into())
177 })
178 }
179
180 fn get_variable(&self, rule: &str) -> Result<String> {
181 replace_all(&GET_RULE, rule, |capture| {
182 let key = capture
183 .get(1)
184 .ok_or(anyhow!("key is not found"))?
185 .as_str()
186 .trim();
187
188 let v = self
189 .variables
190 .get(key)
191 .ok_or(anyhow!("the value of key {} is not found", key))?;
192
193 Ok(v.to_string())
194 })
195 }
196
197 pub fn get_string(&mut self, rule: &str, data: &str, extra: Option<Value>) -> Result<String> {
198 if rule.is_empty() {
199 return Ok("".to_string());
200 }
201
202 let new_rule = self.put_variable(rule, data)?;
204
205 let new_rule = self.get_variable(&new_rule)?;
207
208 let p_left = new_rule.rfind("{{");
210 let p_right = new_rule.rfind("}}");
211
212 if let Some(left) = p_left
213 && let Some(right) = p_right
214 && left < right
215 {
216 return replace_all(&EXPRESSION, &new_rule, |captures| {
217 let sub_rule = captures.get(1).map(|m| m.as_str().trim()).unwrap_or("");
218 if extra.is_some()
219 && let Some(extra_value) = extra.as_ref().unwrap().get(sub_rule)
220 {
221 return value_to_string(extra_value);
222 }
223 self.get_string(sub_rule, data, None)
224 });
225 }
226
227 let mut temp = data.to_string();
229 for single_rule in self.split_rule_resolve(&new_rule)? {
230 let analyzer = single_rule.analyzer.parse_to_analyzer(&temp)?;
231
232 temp = Self::_get_string(&single_rule, analyzer.as_ref(), &single_rule.rule)?;
233 temp = single_rule.replace_content(&temp)?;
234 }
235 Ok(temp)
236 }
237}
238
239#[cfg(test)]
240mod tests {
241 use super::*;
242 use serde_json::json;
243
244 #[test]
245 fn test_analyzer_manager() {
246 let mut analyzer_manager = AnalyzerManager::new().unwrap();
247 let data = "{\"buymessagevalue\":\"15_15\",\"chapter_id\":300,\"chapter_name\":\"第四卷 剑气近_第二百九十五章 远望\",\"chapter_size\":3253,\"coin\":15,\"coin_original\":15,\"createdate\":\"2023-04-27 23:19:25\",\"license\":1,\"money\":0.15,\"novel_bkid_crid\":\"novel_672340121_300\",\"ori_license\":1,\"txt_url\":\"\",\"zip_url\":\"\"}";
248 analyzer_manager.set("book", 123);
249 analyzer_manager.set("index", 1);
250
251 let res = analyzer_manager.get_string(
252 "https://www.xmkanshu.com/service/getContent?fr=smsstg&v=4&uid=B197589CF54DC527538FADCAE6BDBC78&urbid=%2Fbook_95_0&bkid=@get:{book}&crid={{$.chapter_id}}&pg=1",
253 data,
254 Some(json!({
255 "page":123
256 })),
257 );
258 assert_eq!(
259 res.unwrap(),
260 "https://www.xmkanshu.com/service/getContent?fr=smsstg&v=4&uid=B197589CF54DC527538FADCAE6BDBC78&urbid=%2Fbook_95_0&bkid=123&crid=300&pg=1"
261 );
262 }
263
264 #[test]
265 fn test_analyzer_manager_get_analyzer() {
266 let analyzer_manager = AnalyzerManager::new().unwrap();
267 let analyzer =
268 analyzer_manager.get_analyzer("[property=og:novel:latest_chapter_name]@content");
269 assert_eq!(analyzer.analyzer, AnalyzerType::Default);
270
271 let analyzer = analyzer_manager.get_analyzer("$.book.id##4##abc");
272 assert_eq!(analyzer.analyzer, AnalyzerType::JsonPath);
273
274 let analyzer = analyzer_manager.get_analyzer("@css:div h1 a[href]");
275 assert_eq!(analyzer.analyzer, AnalyzerType::Html);
276 }
277}