rustybook_extractor/
payload.rs1use rustybook_errors::ExtractionError;
2use scraper::{Html, Selector};
3use serde::{Deserialize, Deserializer, Serialize};
4use serde_json::Value;
5use tracing::trace;
6
7#[derive(Debug, Serialize, Deserialize)]
8pub struct RuntimePayload {
9 #[serde(
10 default,
11 rename(deserialize = "require"),
12 deserialize_with = "extract_module"
13 )]
14 pub module: Option<ModuleInvocation>,
15}
16
17#[derive(Debug, Serialize, Deserialize)]
18pub struct ModuleInvocation {
19 pub module_name: String,
20 pub method_name: Option<String>,
21 pub meta: Option<Value>,
22 pub body: Option<Value>,
23}
24
25pub fn extract_modules(text: &str) -> Result<Vec<ModuleInvocation>, ExtractionError> {
26 let doc = Html::parse_document(text);
27 let sel = Selector::parse(r#"script[type="application/json"][data-content-len][data-sjs]"#)
28 .map_err(|e| ExtractionError::Selector(e.to_string()))?;
29
30 let modules: Vec<ModuleInvocation> = doc
31 .select(&sel)
32 .map(|t| serde_json::from_str(&t.inner_html()))
33 .collect::<Result<Vec<RuntimePayload>, _>>()?
34 .into_iter()
35 .filter_map(|p| p.module)
36 .collect();
37
38 trace!("extracted {} modules", modules.len());
39
40 Ok(modules)
41}
42
43fn extract_module<'de, D>(deserializer: D) -> Result<Option<ModuleInvocation>, D::Error>
44where
45 D: Deserializer<'de>,
46{
47 let vec = Option::<Vec<ModuleInvocation>>::deserialize(deserializer)?;
48
49 Ok(vec.and_then(|mut v| v.pop()))
50}