Skip to main content

synaptic_lark/loaders/
wiki.rs

1use async_trait::async_trait;
2use serde_json::Value;
3use synaptic_core::{Document, Loader, SynapticError};
4
5use crate::{auth::TokenCache, LarkConfig};
6
7/// Recursively load all documents from a Feishu/Lark Wiki space.
8///
9/// Traverses the wiki node tree depth-first, loading every `doc`/`docx` node
10/// it finds. Use [`with_max_depth`] to limit how deep the traversal goes.
11///
12/// # Example
13///
14/// ```rust,no_run
15/// use synaptic_lark::{LarkConfig, LarkWikiLoader};
16/// use synaptic_core::Loader;
17///
18/// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
19/// let loader = LarkWikiLoader::new(LarkConfig::new("cli_xxx", "secret"))
20///     .with_space_id("space_xxx")
21///     .with_max_depth(3);
22/// let docs = loader.load().await?;
23/// # Ok(())
24/// # }
25/// ```
26pub struct LarkWikiLoader {
27    token_cache: TokenCache,
28    base_url: String,
29    client: reqwest::Client,
30    space_id: Option<String>,
31    max_depth: Option<usize>,
32    config_snapshot: LarkConfig,
33}
34
35impl LarkWikiLoader {
36    /// Create a new loader using the given config.
37    pub fn new(config: LarkConfig) -> Self {
38        let base_url = config.base_url.clone();
39        Self {
40            token_cache: config.clone().token_cache(),
41            base_url,
42            client: reqwest::Client::new(),
43            space_id: None,
44            max_depth: None,
45            config_snapshot: config,
46        }
47    }
48
49    /// Set the Wiki space ID to traverse.
50    pub fn with_space_id(mut self, id: impl Into<String>) -> Self {
51        self.space_id = Some(id.into());
52        self
53    }
54
55    /// Limit recursive traversal to `d` levels (default: unlimited).
56    pub fn with_max_depth(mut self, d: usize) -> Self {
57        self.max_depth = Some(d);
58        self
59    }
60
61    /// Return the space ID (empty string if not set).
62    pub fn space_id(&self) -> &str {
63        self.space_id.as_deref().unwrap_or("")
64    }
65
66    /// Return the configured max depth.
67    pub fn max_depth(&self) -> Option<usize> {
68        self.max_depth
69    }
70
71    /// Recursively collect all document/docx obj_tokens under a wiki space node.
72    async fn collect_tokens(
73        &self,
74        token: &str,
75        parent_node_token: Option<&str>,
76        depth: usize,
77    ) -> Result<Vec<String>, SynapticError> {
78        if let Some(max) = self.max_depth {
79            if depth > max {
80                return Ok(vec![]);
81            }
82        }
83        let space_id = self.space_id.as_deref().unwrap();
84        let mut url = format!(
85            "{}/wiki/v2/spaces/{}/nodes?page_size=50",
86            self.base_url, space_id
87        );
88        if let Some(pt) = parent_node_token {
89            url.push_str(&format!("&parent_node_token={pt}"));
90        }
91        let resp = self
92            .client
93            .get(&url)
94            .bearer_auth(token)
95            .send()
96            .await
97            .map_err(|e| SynapticError::Loader(format!("wiki nodes: {e}")))?;
98        let body: Value = resp
99            .json()
100            .await
101            .map_err(|e| SynapticError::Loader(format!("wiki nodes parse: {e}")))?;
102        if body["code"].as_i64().unwrap_or(-1) != 0 {
103            return Err(SynapticError::Loader(format!(
104                "Lark Wiki API error: {}",
105                body["msg"].as_str().unwrap_or("unknown")
106            )));
107        }
108
109        let mut tokens = vec![];
110        let items = body["data"]["items"]
111            .as_array()
112            .cloned()
113            .unwrap_or_default();
114        for item in &items {
115            let obj_type = item["obj_type"].as_str().unwrap_or("");
116            let obj_token = item["obj_token"].as_str().unwrap_or("").to_string();
117            let node_token = item["node_token"].as_str().unwrap_or("");
118            let has_child = item["has_child"].as_bool().unwrap_or(false);
119
120            if obj_type == "doc" || obj_type == "docx" {
121                tokens.push(obj_token);
122            }
123            if has_child {
124                let children =
125                    Box::pin(self.collect_tokens(token, Some(node_token), depth + 1)).await?;
126                tokens.extend(children);
127            }
128        }
129        Ok(tokens)
130    }
131}
132
133#[async_trait]
134impl Loader for LarkWikiLoader {
135    async fn load(&self) -> Result<Vec<Document>, SynapticError> {
136        self.space_id
137            .as_deref()
138            .ok_or_else(|| SynapticError::Config("LarkWikiLoader: space_id not set".to_string()))?;
139        let token = self.token_cache.get_token().await?;
140        let doc_tokens = self.collect_tokens(&token, None, 0).await?;
141
142        let loader = crate::loaders::doc::LarkDocLoader::new(self.config_snapshot.clone())
143            .with_doc_tokens(doc_tokens);
144        loader.load().await
145    }
146}