amagi 0.1.3

Rust SDK, CLI, and Web API service skeleton for multi-platform social web adapters.
Documentation
use std::{borrow::Cow, collections::BTreeMap};

use reqwest::header::{HeaderMap, HeaderName, HeaderValue};
use serde::de::DeserializeOwned;
use serde_json::{Map, Value};

use crate::error::AppError;

use super::super::types::DouyinSearchType;

pub(super) fn decode_douyin_payload<T>(value: Value) -> Result<T, AppError>
where
    T: DeserializeOwned,
{
    serde_json::from_value(inject_upstream_payload(value)).map_err(AppError::from)
}

pub(super) fn header_map_from_headers(
    headers: &BTreeMap<String, String>,
) -> Result<HeaderMap, AppError> {
    let mut header_map = HeaderMap::new();

    for (name, value) in headers {
        let header_name = HeaderName::from_bytes(name.as_bytes())
            .map_err(|_| AppError::InvalidRequestConfig(format!("invalid header name `{name}`")))?;
        let header_value = HeaderValue::from_str(value).map_err(|_| {
            AppError::InvalidRequestConfig(format!("invalid header value for `{name}`"))
        })?;
        header_map.insert(header_name, header_value);
    }

    Ok(header_map)
}

pub(super) fn parse_json_payload(url: &str, body: &str) -> Result<Value, AppError> {
    let trimmed = body.trim();
    if trimmed.is_empty() {
        return Err(AppError::UpstreamResponse {
            status: None,
            message: format!("douyin request to {url} returned an empty body"),
        });
    }

    let direct_parse_error = match serde_json::from_str::<Value>(trimmed) {
        Ok(value) => {
            validate_douyin_response(url, &value)?;
            return Ok(value);
        }
        Err(error) => error,
    };

    if looks_like_html(trimmed) {
        return Err(AppError::UpstreamResponse {
            status: None,
            message: format!(
                "douyin request to {url} returned HTML instead of JSON: {}",
                html_preview(trimmed)
            ),
        });
    }

    if let Some(candidate) = extract_wrapped_json_payload(trimmed) {
        let value = serde_json::from_str::<Value>(candidate)?;
        validate_douyin_response(url, &value)?;
        return Ok(value);
    }

    Err(direct_parse_error.into())
}

pub(super) fn validate_douyin_response(url: &str, value: &Value) -> Result<(), AppError> {
    if let Some(filter_reason) = value
        .get("filter_detail")
        .and_then(|value| value.get("filter_reason"))
        .and_then(Value::as_str)
        .filter(|value| !value.is_empty())
    {
        return Err(AppError::UpstreamResponse {
            status: None,
            message: format!("douyin request to {url} was filtered: {filter_reason}"),
        });
    }

    if let Some(status_code) = value.get("status_code").and_then(Value::as_i64) {
        if status_code != 0 {
            let message = value
                .get("status_msg")
                .and_then(Value::as_str)
                .unwrap_or("unknown douyin error");
            return Err(AppError::UpstreamResponse {
                status: None,
                message: format!(
                    "douyin request to {url} failed with status_code={status_code}: {message}"
                ),
            });
        }
    }

    Ok(())
}

pub(super) fn has_more_value(value: Option<&Value>) -> bool {
    match value {
        Some(Value::Bool(value)) => *value,
        Some(Value::Number(value)) => value.as_i64().is_some_and(|value| value != 0),
        Some(Value::String(value)) => !value.is_empty() && value != "0" && value != "false",
        _ => false,
    }
}

pub(super) fn stringify_cursor(value: &Value) -> Option<String> {
    match value {
        Value::String(value) => Some(value.clone()),
        Value::Number(value) => Some(value.to_string()),
        _ => None,
    }
}

pub(super) fn extract_array_field(value: &Value, field: &str) -> Vec<Value> {
    value
        .get(field)
        .and_then(Value::as_array)
        .cloned()
        .unwrap_or_default()
}

pub(super) fn set_array_field(
    value: &mut Value,
    field: &str,
    items: Vec<Value>,
    truncate: Option<usize>,
) {
    let items = truncate.map_or(items.clone(), |limit| {
        items.into_iter().take(limit).collect::<Vec<_>>()
    });
    set_field(value, field, Value::Array(items));
}

pub(super) fn set_field(value: &mut Value, field: &str, inner: Value) {
    if !value.is_object() {
        *value = Value::Object(Map::new());
    }

    if let Some(object) = value.as_object_mut() {
        object.insert(field.to_owned(), inner);
    }
}

pub(super) fn search_referer(query: &str, search_type: DouyinSearchType) -> String {
    let encoded_query = encode_search_segment(query);
    match search_type {
        DouyinSearchType::General => format!("https://www.douyin.com/root/search/{encoded_query}"),
        DouyinSearchType::User => {
            format!("https://www.douyin.com/search/{encoded_query}?type=user")
        }
        DouyinSearchType::Video => {
            format!("https://www.douyin.com/search/{encoded_query}?type=video")
        }
    }
}

pub(super) fn encode_search_segment(value: &str) -> Cow<'_, str> {
    if value
        .bytes()
        .all(|byte| byte.is_ascii_alphanumeric() || matches!(byte, b'-' | b'_' | b'.' | b'~'))
    {
        Cow::Borrowed(value)
    } else {
        Cow::Owned(
            value
                .bytes()
                .map(|byte| match byte {
                    b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'_' | b'.' | b'~' => {
                        (byte as char).to_string()
                    }
                    _ => format!("%{:02X}", byte),
                })
                .collect::<String>(),
        )
    }
}

pub(super) fn parse_douyin_multi_json(raw: &str) -> Vec<Value> {
    let mut blocks = Vec::new();
    let mut search_start = 0usize;

    while search_start < raw.len() {
        let rest = &raw[search_start..];
        let relative_start = rest.find(['{', '[']);
        let Some(relative_start) = relative_start else {
            break;
        };
        let block_start = search_start + relative_start;
        let Some(relative_end) = find_balanced_json_end(&raw[block_start..]) else {
            break;
        };
        let block_end = block_start + relative_end;
        let block = &raw[block_start..block_end];
        if let Ok(value) = serde_json::from_str::<Value>(block) {
            blocks.push(value);
        }
        search_start = block_end;
    }

    blocks
}

pub(super) fn filter_search_responses(values: Vec<Value>) -> Vec<Value> {
    values
        .into_iter()
        .filter(|value| {
            value.get("cursor").and_then(Value::as_i64).is_some()
                && value.get("has_more").and_then(Value::as_i64).is_some()
                && value.get("data").and_then(Value::as_array).is_some()
        })
        .collect()
}

fn looks_like_html(body: &str) -> bool {
    body.starts_with('<')
}

fn html_preview(body: &str) -> String {
    body.lines()
        .flat_map(str::split_whitespace)
        .take(12)
        .collect::<Vec<_>>()
        .join(" ")
}

fn extract_wrapped_json_payload(body: &str) -> Option<&str> {
    let start = body.find(['{', '['])?;
    let end = start + find_balanced_json_end(&body[start..])?;
    let prefix = body[..start].trim();
    let suffix = body[end..].trim();

    if is_supported_json_wrapper(prefix, suffix) {
        Some(&body[start..end])
    } else {
        None
    }
}

fn is_supported_json_wrapper(prefix: &str, suffix: &str) -> bool {
    is_supported_json_prefix(prefix) && is_supported_json_suffix(suffix)
}

fn is_supported_json_prefix(prefix: &str) -> bool {
    if prefix.is_empty() {
        return true;
    }

    if prefix.contains('<') {
        return false;
    }

    if matches!(prefix, "for(;;);" | "for(;;)" | "while(1);" | "while(1)") {
        return true;
    }

    prefix.strip_suffix('(').is_some_and(|callback| {
        let callback = callback.trim();
        !callback.is_empty()
            && callback
                .chars()
                .all(|ch| ch.is_ascii_alphanumeric() || matches!(ch, '_' | '$' | '.'))
    })
}

fn is_supported_json_suffix(suffix: &str) -> bool {
    matches!(suffix, "" | ";" | ")" | ");")
}

fn find_balanced_json_end(raw: &str) -> Option<usize> {
    let mut stack = Vec::new();
    let mut in_string = false;
    let mut escaped = false;

    for (index, ch) in raw.char_indices() {
        if in_string {
            match ch {
                '\\' if !escaped => escaped = true,
                '"' if !escaped => in_string = false,
                _ => escaped = false,
            }
            continue;
        }

        match ch {
            '"' => in_string = true,
            '{' | '[' => stack.push(ch),
            '}' => {
                if stack.pop() != Some('{') {
                    return None;
                }
                if stack.is_empty() {
                    return Some(index + ch.len_utf8());
                }
            }
            ']' => {
                if stack.pop() != Some('[') {
                    return None;
                }
                if stack.is_empty() {
                    return Some(index + ch.len_utf8());
                }
            }
            _ => {}
        }
    }

    None
}

fn inject_upstream_payload(value: Value) -> Value {
    match value {
        Value::Object(mut object) => {
            let upstream_payload = Value::Object(object.clone());
            object.insert("upstream_payload".to_owned(), upstream_payload);
            Value::Object(object)
        }
        other => other,
    }
}