Skip to main content

rustybook_extractor/
payload.rs

1use rustybook_errors::ExtractionError;
2use scraper::{Html, Selector};
3use serde::{Deserialize, Deserializer, Serialize};
4use serde_json::Value;
5use tracing::trace;
6
7#[derive(Debug, Serialize, Deserialize)]
8pub struct RuntimePayload {
9    #[serde(
10        default,
11        rename(deserialize = "require"),
12        deserialize_with = "extract_module"
13    )]
14    pub module: Option<ModuleInvocation>,
15}
16
17#[derive(Debug, Serialize, Deserialize)]
18pub struct ModuleInvocation {
19    pub module_name: String,
20    pub method_name: Option<String>,
21    pub meta: Option<Value>,
22    pub body: Option<Value>,
23}
24
25pub fn extract_modules(text: &str) -> Result<Vec<ModuleInvocation>, ExtractionError> {
26    let doc = Html::parse_document(text);
27    let sel = Selector::parse(r#"script[type="application/json"][data-content-len][data-sjs]"#)
28        .map_err(|e| ExtractionError::Selector(e.to_string()))?;
29
30    let modules: Vec<ModuleInvocation> = doc
31        .select(&sel)
32        .map(|t| serde_json::from_str(&t.inner_html()))
33        .collect::<Result<Vec<RuntimePayload>, _>>()?
34        .into_iter()
35        .filter_map(|p| p.module)
36        .collect();
37
38    trace!("extracted {} modules", modules.len());
39
40    Ok(modules)
41}
42
43fn extract_module<'de, D>(deserializer: D) -> Result<Option<ModuleInvocation>, D::Error>
44where
45    D: Deserializer<'de>,
46{
47    let vec = Option::<Vec<ModuleInvocation>>::deserialize(deserializer)?;
48
49    Ok(vec.and_then(|mut v| v.pop()))
50}