use async_trait::async_trait;
use serde_json::Value;
use synaptic_core::{Document, Loader, SynapticError};
use crate::{auth::TokenCache, LarkConfig};
pub struct LarkWikiLoader {
token_cache: TokenCache,
base_url: String,
client: reqwest::Client,
space_id: Option<String>,
max_depth: Option<usize>,
config_snapshot: LarkConfig,
}
impl LarkWikiLoader {
pub fn new(config: LarkConfig) -> Self {
let base_url = config.base_url.clone();
Self {
token_cache: config.clone().token_cache(),
base_url,
client: reqwest::Client::new(),
space_id: None,
max_depth: None,
config_snapshot: config,
}
}
pub fn with_space_id(mut self, id: impl Into<String>) -> Self {
self.space_id = Some(id.into());
self
}
pub fn with_max_depth(mut self, d: usize) -> Self {
self.max_depth = Some(d);
self
}
pub fn space_id(&self) -> &str {
self.space_id.as_deref().unwrap_or("")
}
pub fn max_depth(&self) -> Option<usize> {
self.max_depth
}
async fn collect_tokens(
&self,
token: &str,
parent_node_token: Option<&str>,
depth: usize,
) -> Result<Vec<String>, SynapticError> {
if let Some(max) = self.max_depth {
if depth > max {
return Ok(vec![]);
}
}
let space_id = self.space_id.as_deref().unwrap();
let mut url = format!(
"{}/wiki/v2/spaces/{}/nodes?page_size=50",
self.base_url, space_id
);
if let Some(pt) = parent_node_token {
url.push_str(&format!("&parent_node_token={pt}"));
}
let resp = self
.client
.get(&url)
.bearer_auth(token)
.send()
.await
.map_err(|e| SynapticError::Loader(format!("wiki nodes: {e}")))?;
let body: Value = resp
.json()
.await
.map_err(|e| SynapticError::Loader(format!("wiki nodes parse: {e}")))?;
if body["code"].as_i64().unwrap_or(-1) != 0 {
return Err(SynapticError::Loader(format!(
"Lark Wiki API error: {}",
body["msg"].as_str().unwrap_or("unknown")
)));
}
let mut tokens = vec![];
let items = body["data"]["items"]
.as_array()
.cloned()
.unwrap_or_default();
for item in &items {
let obj_type = item["obj_type"].as_str().unwrap_or("");
let obj_token = item["obj_token"].as_str().unwrap_or("").to_string();
let node_token = item["node_token"].as_str().unwrap_or("");
let has_child = item["has_child"].as_bool().unwrap_or(false);
if obj_type == "doc" || obj_type == "docx" {
tokens.push(obj_token);
}
if has_child {
let children =
Box::pin(self.collect_tokens(token, Some(node_token), depth + 1)).await?;
tokens.extend(children);
}
}
Ok(tokens)
}
}
#[async_trait]
impl Loader for LarkWikiLoader {
async fn load(&self) -> Result<Vec<Document>, SynapticError> {
self.space_id
.as_deref()
.ok_or_else(|| SynapticError::Config("LarkWikiLoader: space_id not set".to_string()))?;
let token = self.token_cache.get_token().await?;
let doc_tokens = self.collect_tokens(&token, None, 0).await?;
let loader = crate::loaders::doc::LarkDocLoader::new(self.config_snapshot.clone())
.with_doc_tokens(doc_tokens);
loader.load().await
}
}