synaptic_lark/loaders/
spreadsheet.rs1use crate::{auth::TokenCache, LarkConfig};
2use async_trait::async_trait;
3use serde_json::Value;
4use std::collections::HashMap;
5use synaptic_core::{Document, Loader, SynapticError};
6
7pub struct LarkSpreadsheetLoader {
30 token_cache: TokenCache,
31 base_url: String,
32 client: reqwest::Client,
33 spreadsheet_token: Option<String>,
34 sheet_id: String,
35 content_col: usize,
36 header_row: bool,
37}
38
39impl LarkSpreadsheetLoader {
40 pub fn new(config: LarkConfig) -> Self {
42 let base_url = config.base_url.clone();
43 Self {
44 token_cache: config.token_cache(),
45 base_url,
46 client: reqwest::Client::new(),
47 spreadsheet_token: None,
48 sheet_id: "0".to_string(),
49 content_col: 0,
50 header_row: true,
51 }
52 }
53
54 pub fn with_token(mut self, t: impl Into<String>) -> Self {
56 self.spreadsheet_token = Some(t.into());
57 self
58 }
59
60 pub fn with_sheet(mut self, id: impl Into<String>) -> Self {
62 self.sheet_id = id.into();
63 self
64 }
65
66 pub fn with_content_col(mut self, col: usize) -> Self {
68 self.content_col = col;
69 self
70 }
71
72 pub fn with_header_row(mut self, v: bool) -> Self {
74 self.header_row = v;
75 self
76 }
77
78 pub fn spreadsheet_token(&self) -> &str {
80 self.spreadsheet_token.as_deref().unwrap_or("")
81 }
82
83 pub fn sheet_id(&self) -> &str {
85 &self.sheet_id
86 }
87}
88
89#[async_trait]
90impl Loader for LarkSpreadsheetLoader {
91 async fn load(&self) -> Result<Vec<Document>, SynapticError> {
92 let stoken = self.spreadsheet_token.as_deref().ok_or_else(|| {
93 SynapticError::Config("LarkSpreadsheetLoader: spreadsheet_token not set".to_string())
94 })?;
95 let token = self.token_cache.get_token().await?;
96
97 let range = format!("{}!A1:ZZ10000", self.sheet_id);
98 let url = format!(
99 "{}/sheets/v2/spreadsheets/{}/values/{}?renderType=PlainText",
100 self.base_url,
101 stoken,
102 urlencoding::encode(&range)
103 );
104 let resp = self
105 .client
106 .get(&url)
107 .bearer_auth(&token)
108 .send()
109 .await
110 .map_err(|e| SynapticError::Loader(format!("spreadsheet fetch: {e}")))?;
111 let body: Value = resp
112 .json()
113 .await
114 .map_err(|e| SynapticError::Loader(format!("spreadsheet parse: {e}")))?;
115 if body["code"].as_i64().unwrap_or(-1) != 0 {
116 return Err(SynapticError::Loader(format!(
117 "Lark Spreadsheet API error: {}",
118 body["msg"].as_str().unwrap_or("unknown")
119 )));
120 }
121
122 let rows = body["data"]["valueRange"]["values"]
123 .as_array()
124 .ok_or_else(|| SynapticError::Loader("no values in spreadsheet".to_string()))?;
125
126 let empty_row: Vec<Value> = Vec::new();
127 let (headers, data_rows) = if self.header_row && !rows.is_empty() {
128 let hdrs: Vec<String> = rows[0]
129 .as_array()
130 .unwrap_or(&empty_row)
131 .iter()
132 .map(|v| v.as_str().unwrap_or("").to_string())
133 .collect();
134 (hdrs, &rows[1..])
135 } else {
136 (vec![], rows.as_slice())
137 };
138
139 let empty: Vec<Value> = Vec::new();
140 let mut docs = Vec::new();
141 for (i, row) in data_rows.iter().enumerate() {
142 let cells = row.as_array().unwrap_or(&empty);
143 let content = cells
144 .get(self.content_col)
145 .and_then(|v| v.as_str())
146 .unwrap_or("")
147 .to_string();
148
149 let mut metadata = HashMap::new();
150 metadata.insert(
151 "row_index".to_string(),
152 Value::from(i + if self.header_row { 2 } else { 1 }),
153 );
154 metadata.insert(
155 "source".to_string(),
156 Value::String("lark_spreadsheet".to_string()),
157 );
158 for (j, cell) in cells.iter().enumerate() {
159 if j == self.content_col {
160 continue;
161 }
162 let col_name = headers
163 .get(j)
164 .cloned()
165 .unwrap_or_else(|| format!("col_{j}"));
166 metadata.insert(col_name, cell.clone());
167 }
168
169 docs.push(Document {
170 id: format!("{}_{}", stoken, i),
171 content,
172 metadata,
173 });
174 }
175 Ok(docs)
176 }
177}