Skip to main content

synaptic_lark/loaders/
drive.rs

1use async_trait::async_trait;
2use serde_json::Value;
3use synaptic_core::{Document, Loader, SynapticError};
4
5use crate::{
6    auth::TokenCache, loaders::doc::LarkDocLoader, loaders::spreadsheet::LarkSpreadsheetLoader,
7    LarkConfig,
8};
9
10/// Load documents from a Feishu/Lark Drive folder as Synaptic [`Document`]s.
11///
12/// Iterates over files in the folder and loads `doc`/`docx` files via
13/// [`LarkDocLoader`] and `sheet` files via [`LarkSpreadsheetLoader`].
14/// Use [`recursive`] to also traverse sub-folders.
15///
16/// # Example
17///
18/// ```rust,no_run
19/// use synaptic_lark::{LarkConfig, LarkDriveLoader};
20/// use synaptic_core::Loader;
21///
22/// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
23/// let loader = LarkDriveLoader::new(LarkConfig::new("cli_xxx", "secret"))
24///     .with_folder_token("fldcnXxx")
25///     .recursive();
26/// let docs = loader.load().await?;
27/// # Ok(())
28/// # }
29/// ```
30pub struct LarkDriveLoader {
31    token_cache: TokenCache,
32    base_url: String,
33    config_snapshot: LarkConfig,
34    client: reqwest::Client,
35    folder_token: Option<String>,
36    recursive: bool,
37}
38
39impl LarkDriveLoader {
40    /// Create a new loader using the given config.
41    pub fn new(config: LarkConfig) -> Self {
42        let base_url = config.base_url.clone();
43        Self {
44            token_cache: config.clone().token_cache(),
45            base_url,
46            config_snapshot: config,
47            client: reqwest::Client::new(),
48            folder_token: None,
49            recursive: false,
50        }
51    }
52
53    /// Set the folder token to list files from.
54    pub fn with_folder_token(mut self, t: impl Into<String>) -> Self {
55        self.folder_token = Some(t.into());
56        self
57    }
58
59    /// Enable recursive traversal into sub-folders.
60    pub fn recursive(mut self) -> Self {
61        self.recursive = true;
62        self
63    }
64
65    /// Return the folder token (empty string if not set).
66    pub fn folder_token(&self) -> &str {
67        self.folder_token.as_deref().unwrap_or("")
68    }
69}
70
71#[async_trait]
72impl Loader for LarkDriveLoader {
73    async fn load(&self) -> Result<Vec<Document>, SynapticError> {
74        let folder = self.folder_token.as_deref().ok_or_else(|| {
75            SynapticError::Config("LarkDriveLoader: folder_token not set".to_string())
76        })?;
77        let token = self.token_cache.get_token().await?;
78        let url = format!(
79            "{}/drive/v1/files?folder_token={}&page_size=200",
80            self.base_url, folder
81        );
82        let resp = self
83            .client
84            .get(&url)
85            .bearer_auth(&token)
86            .send()
87            .await
88            .map_err(|e| SynapticError::Loader(format!("drive list: {e}")))?;
89        let body: Value = resp
90            .json()
91            .await
92            .map_err(|e| SynapticError::Loader(format!("drive list parse: {e}")))?;
93        if body["code"].as_i64().unwrap_or(-1) != 0 {
94            return Err(SynapticError::Loader(format!(
95                "Lark Drive API error: {}",
96                body["msg"].as_str().unwrap_or("unknown")
97            )));
98        }
99
100        let mut docs = Vec::new();
101        let items = body["data"]["files"]
102            .as_array()
103            .cloned()
104            .unwrap_or_default();
105        for item in &items {
106            let file_type = item["type"].as_str().unwrap_or("");
107            let token_val = item["token"].as_str().unwrap_or("");
108            match file_type {
109                "doc" | "docx" => {
110                    let loader = LarkDocLoader::new(self.config_snapshot.clone())
111                        .with_doc_tokens(vec![token_val.to_string()]);
112                    match loader.load().await {
113                        Ok(d) => docs.extend(d),
114                        Err(e) => tracing::warn!("drive: skip doc {token_val}: {e}"),
115                    }
116                }
117                "sheet" => {
118                    let loader = LarkSpreadsheetLoader::new(self.config_snapshot.clone())
119                        .with_token(token_val);
120                    match loader.load().await {
121                        Ok(d) => docs.extend(d),
122                        Err(e) => tracing::warn!("drive: skip sheet {token_val}: {e}"),
123                    }
124                }
125                "folder" if self.recursive => {
126                    let sub = LarkDriveLoader::new(self.config_snapshot.clone())
127                        .with_folder_token(token_val)
128                        .recursive();
129                    match sub.load().await {
130                        Ok(d) => docs.extend(d),
131                        Err(e) => tracing::warn!("drive: skip subfolder {token_val}: {e}"),
132                    }
133                }
134                _ => {}
135            }
136        }
137        Ok(docs)
138    }
139}