Skip to main content

synaptic_lark/loaders/
spreadsheet.rs

1use crate::{auth::TokenCache, LarkConfig};
2use async_trait::async_trait;
3use serde_json::Value;
4use std::collections::HashMap;
5use synaptic_core::{Document, Loader, SynapticError};
6
7/// Load rows from a Feishu/Lark spreadsheet as Synaptic [`Document`]s.
8///
9/// Each non-header row becomes one document. The column designated by
10/// [`with_content_col`] supplies the document `content`; all other columns
11/// are stored in the document `metadata`.
12///
13/// # Example
14///
15/// ```rust,no_run
16/// use synaptic_lark::{LarkConfig, LarkSpreadsheetLoader};
17/// use synaptic_core::Loader;
18///
19/// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
20/// let loader = LarkSpreadsheetLoader::new(LarkConfig::new("cli_xxx", "secret"))
21///     .with_token("shtcnXxx")
22///     .with_sheet("0")
23///     .with_content_col(0)
24///     .with_header_row(true);
25/// let docs = loader.load().await?;
26/// # Ok(())
27/// # }
28/// ```
29pub struct LarkSpreadsheetLoader {
30    token_cache: TokenCache,
31    base_url: String,
32    client: reqwest::Client,
33    spreadsheet_token: Option<String>,
34    sheet_id: String,
35    content_col: usize,
36    header_row: bool,
37}
38
39impl LarkSpreadsheetLoader {
40    /// Create a new loader using the given config.
41    pub fn new(config: LarkConfig) -> Self {
42        let base_url = config.base_url.clone();
43        Self {
44            token_cache: config.token_cache(),
45            base_url,
46            client: reqwest::Client::new(),
47            spreadsheet_token: None,
48            sheet_id: "0".to_string(),
49            content_col: 0,
50            header_row: true,
51        }
52    }
53
54    /// Set the spreadsheet token (e.g. `"shtcnXxx"`).
55    pub fn with_token(mut self, t: impl Into<String>) -> Self {
56        self.spreadsheet_token = Some(t.into());
57        self
58    }
59
60    /// Set the sheet ID within the spreadsheet (default `"0"`).
61    pub fn with_sheet(mut self, id: impl Into<String>) -> Self {
62        self.sheet_id = id.into();
63        self
64    }
65
66    /// Set which column (0-indexed) to use as document `content` (default `0`).
67    pub fn with_content_col(mut self, col: usize) -> Self {
68        self.content_col = col;
69        self
70    }
71
72    /// Whether the first row is a header row (default `true`).
73    pub fn with_header_row(mut self, v: bool) -> Self {
74        self.header_row = v;
75        self
76    }
77
78    /// Return the spreadsheet token (empty string if not set).
79    pub fn spreadsheet_token(&self) -> &str {
80        self.spreadsheet_token.as_deref().unwrap_or("")
81    }
82
83    /// Return the sheet ID.
84    pub fn sheet_id(&self) -> &str {
85        &self.sheet_id
86    }
87}
88
89#[async_trait]
90impl Loader for LarkSpreadsheetLoader {
91    async fn load(&self) -> Result<Vec<Document>, SynapticError> {
92        let stoken = self.spreadsheet_token.as_deref().ok_or_else(|| {
93            SynapticError::Config("LarkSpreadsheetLoader: spreadsheet_token not set".to_string())
94        })?;
95        let token = self.token_cache.get_token().await?;
96
97        let range = format!("{}!A1:ZZ10000", self.sheet_id);
98        let url = format!(
99            "{}/sheets/v2/spreadsheets/{}/values/{}?renderType=PlainText",
100            self.base_url,
101            stoken,
102            urlencoding::encode(&range)
103        );
104        let resp = self
105            .client
106            .get(&url)
107            .bearer_auth(&token)
108            .send()
109            .await
110            .map_err(|e| SynapticError::Loader(format!("spreadsheet fetch: {e}")))?;
111        let body: Value = resp
112            .json()
113            .await
114            .map_err(|e| SynapticError::Loader(format!("spreadsheet parse: {e}")))?;
115        if body["code"].as_i64().unwrap_or(-1) != 0 {
116            return Err(SynapticError::Loader(format!(
117                "Lark Spreadsheet API error: {}",
118                body["msg"].as_str().unwrap_or("unknown")
119            )));
120        }
121
122        let rows = body["data"]["valueRange"]["values"]
123            .as_array()
124            .ok_or_else(|| SynapticError::Loader("no values in spreadsheet".to_string()))?;
125
126        let empty_row: Vec<Value> = Vec::new();
127        let (headers, data_rows) = if self.header_row && !rows.is_empty() {
128            let hdrs: Vec<String> = rows[0]
129                .as_array()
130                .unwrap_or(&empty_row)
131                .iter()
132                .map(|v| v.as_str().unwrap_or("").to_string())
133                .collect();
134            (hdrs, &rows[1..])
135        } else {
136            (vec![], rows.as_slice())
137        };
138
139        let empty: Vec<Value> = Vec::new();
140        let mut docs = Vec::new();
141        for (i, row) in data_rows.iter().enumerate() {
142            let cells = row.as_array().unwrap_or(&empty);
143            let content = cells
144                .get(self.content_col)
145                .and_then(|v| v.as_str())
146                .unwrap_or("")
147                .to_string();
148
149            let mut metadata = HashMap::new();
150            metadata.insert(
151                "row_index".to_string(),
152                Value::from(i + if self.header_row { 2 } else { 1 }),
153            );
154            metadata.insert(
155                "source".to_string(),
156                Value::String("lark_spreadsheet".to_string()),
157            );
158            for (j, cell) in cells.iter().enumerate() {
159                if j == self.content_col {
160                    continue;
161                }
162                let col_name = headers
163                    .get(j)
164                    .cloned()
165                    .unwrap_or_else(|| format!("col_{j}"));
166                metadata.insert(col_name, cell.clone());
167            }
168
169            docs.push(Document {
170                id: format!("{}_{}", stoken, i),
171                content,
172                metadata,
173            });
174        }
175        Ok(docs)
176    }
177}