synaptic_lark/loaders/
doc.rs1use async_trait::async_trait;
2use serde_json::Value;
3use std::collections::HashMap;
4use synaptic_core::{Document, Loader, SynapticError};
5
6use crate::{auth::TokenCache, LarkConfig};
7
8pub struct LarkDocLoader {
34 token_cache: TokenCache,
35 base_url: String,
36 doc_tokens: Vec<String>,
37 wiki_space_id: Option<String>,
38 client: reqwest::Client,
39}
40
41impl LarkDocLoader {
42 pub fn new(config: LarkConfig) -> Self {
44 let base_url = config.base_url.clone();
45 Self {
46 token_cache: config.token_cache(),
47 base_url,
48 doc_tokens: vec![],
49 wiki_space_id: None,
50 client: reqwest::Client::new(),
51 }
52 }
53
54 pub fn with_doc_tokens(mut self, tokens: Vec<String>) -> Self {
56 self.doc_tokens = tokens;
57 self
58 }
59
60 pub fn with_wiki_space_id(mut self, space_id: impl Into<String>) -> Self {
62 self.wiki_space_id = Some(space_id.into());
63 self
64 }
65
66 async fn auth_header(&self) -> Result<String, SynapticError> {
67 let token = self.token_cache.get_token().await?;
68 Ok(format!("Bearer {token}"))
69 }
70
71 async fn fetch_doc_content(&self, doc_token: &str) -> Result<Document, SynapticError> {
73 let auth = self.auth_header().await?;
74 let url = format!(
75 "{}/docx/v1/documents/{}/raw_content",
76 self.base_url, doc_token
77 );
78 let resp = self
79 .client
80 .get(&url)
81 .header("Authorization", auth)
82 .send()
83 .await
84 .map_err(|e| SynapticError::Loader(format!("Lark doc fetch: {e}")))?;
85
86 let body: Value = resp
87 .json()
88 .await
89 .map_err(|e| SynapticError::Loader(format!("Lark doc parse: {e}")))?;
90
91 check_lark_code(&body, "fetch doc content")?;
92
93 let content = body["data"]["content"].as_str().unwrap_or("").to_string();
94 let title = body["data"]["title"].as_str().unwrap_or("").to_string();
95
96 let mut metadata = HashMap::new();
97 metadata.insert("doc_id".to_string(), Value::String(doc_token.to_string()));
98 metadata.insert("title".to_string(), Value::String(title));
99 metadata.insert(
100 "source".to_string(),
101 Value::String(format!("lark:doc:{doc_token}")),
102 );
103 metadata.insert(
104 "url".to_string(),
105 Value::String(format!("https://bytedance.feishu.cn/docx/{doc_token}")),
106 );
107 metadata.insert("doc_type".to_string(), Value::String("docx".to_string()));
108
109 Ok(Document {
110 id: doc_token.to_string(),
111 content,
112 metadata,
113 })
114 }
115
116 async fn list_wiki_nodes(&self, space_id: &str) -> Result<Vec<String>, SynapticError> {
118 let auth = self.auth_header().await?;
119 let mut tokens = Vec::new();
120 let mut page_token: Option<String> = None;
121
122 loop {
123 let mut url = format!(
124 "{}/wiki/v2/spaces/{}/nodes?page_size=50",
125 self.base_url, space_id
126 );
127 if let Some(ref pt) = page_token {
128 url.push_str(&format!("&page_token={pt}"));
129 }
130
131 let resp = self
132 .client
133 .get(&url)
134 .header("Authorization", auth.clone())
135 .send()
136 .await
137 .map_err(|e| SynapticError::Loader(format!("Lark wiki list: {e}")))?;
138
139 let body: Value = resp
140 .json()
141 .await
142 .map_err(|e| SynapticError::Loader(format!("Lark wiki parse: {e}")))?;
143
144 check_lark_code(&body, "list wiki nodes")?;
145
146 if let Some(items) = body["data"]["items"].as_array() {
147 for item in items {
148 if let Some(obj_token) = item["obj_token"].as_str() {
149 let obj_type = item["obj_type"].as_str().unwrap_or("");
150 if obj_type == "docx" || obj_type == "doc" {
151 tokens.push(obj_token.to_string());
152 }
153 }
154 }
155 }
156
157 let has_more = body["data"]["has_more"].as_bool().unwrap_or(false);
158 if !has_more {
159 break;
160 }
161 page_token = body["data"]["page_token"].as_str().map(|s| s.to_string());
162 }
163 Ok(tokens)
164 }
165}
166
167fn check_lark_code(body: &Value, ctx: &str) -> Result<(), SynapticError> {
168 let code = body["code"].as_i64().unwrap_or(-1);
169 if code != 0 {
170 return Err(SynapticError::Loader(format!(
171 "Lark API error ({ctx}) code={code}: {}",
172 body["msg"].as_str().unwrap_or("unknown")
173 )));
174 }
175 Ok(())
176}
177
178#[async_trait]
179impl Loader for LarkDocLoader {
180 async fn load(&self) -> Result<Vec<Document>, SynapticError> {
181 let mut all_tokens = self.doc_tokens.clone();
182
183 if let Some(ref space_id) = self.wiki_space_id {
184 let wiki_tokens = self.list_wiki_nodes(space_id).await?;
185 all_tokens.extend(wiki_tokens);
186 }
187
188 let mut documents = Vec::new();
189 for token in &all_tokens {
190 match self.fetch_doc_content(token).await {
191 Ok(doc) => documents.push(doc),
192 Err(e) => {
193 tracing::warn!("Failed to load Lark doc {token}: {e}");
194 }
195 }
196 }
197 Ok(documents)
198 }
199}