Skip to main content

synaptic_lark/loaders/
bitable.rs

1use crate::{api::bitable::BitableApi, LarkConfig};
2use async_trait::async_trait;
3use serde_json::Value;
4use std::collections::HashMap;
5use synaptic_core::{Document, Loader, SynapticError};
6
7/// Load Feishu/Lark Bitable records into Synaptic [`Document`]s for RAG pipelines.
8///
9/// Each Bitable record becomes one `Document`. The `content` field is populated from
10/// the field named by [`with_content_field`], or the first string-typed field when
11/// no explicit field is given. All other fields are stored in `metadata`.
12///
13/// # Example
14///
15/// ```rust,no_run
16/// use synaptic_lark::{LarkConfig, LarkBitableLoader};
17/// use synaptic_core::Loader;
18///
19/// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
20/// let config = LarkConfig::new("cli_xxx", "secret_xxx");
21/// let loader = LarkBitableLoader::new(config)
22///     .with_app("bascnAbcXxx")
23///     .with_table("tblXxx")
24///     .with_content_field("Description");
25///
26/// let docs = loader.load().await?;
27/// for doc in &docs {
28///     println!("Record: {}", doc.id);
29///     println!("Content: {}", doc.content);
30/// }
31/// # Ok(())
32/// # }
33/// ```
34pub struct LarkBitableLoader {
35    api: BitableApi,
36    app_token: Option<String>,
37    table_id: Option<String>,
38    view_id: Option<String>,
39    /// Name of the field whose value becomes `Document.content`.
40    /// When `None`, the first text-type field is used.
41    content_field: Option<String>,
42}
43
44impl LarkBitableLoader {
45    /// Create a new loader using the given config.
46    pub fn new(config: LarkConfig) -> Self {
47        Self {
48            api: BitableApi::new(config),
49            app_token: None,
50            table_id: None,
51            view_id: None,
52            content_field: None,
53        }
54    }
55
56    /// Set the Bitable app token (e.g. `"bascnAbcXxx"`).
57    pub fn with_app(mut self, app_token: impl Into<String>) -> Self {
58        self.app_token = Some(app_token.into());
59        self
60    }
61
62    /// Set the table ID within the Bitable app (e.g. `"tblXxx"`).
63    pub fn with_table(mut self, table_id: impl Into<String>) -> Self {
64        self.table_id = Some(table_id.into());
65        self
66    }
67
68    /// Optionally filter records by a specific view (e.g. `"vewXxx"`).
69    pub fn with_view(mut self, view_id: impl Into<String>) -> Self {
70        self.view_id = Some(view_id.into());
71        self
72    }
73
74    /// Specify which field's value becomes the `Document.content`.
75    ///
76    /// When not set, the first string-typed field is used automatically.
77    pub fn with_content_field(mut self, field: impl Into<String>) -> Self {
78        self.content_field = Some(field.into());
79        self
80    }
81
82    // ── Accessors (used in tests) ────────────────────────────────────────────
83
84    /// Returns the configured app token, or `""` if not set.
85    pub fn app_token(&self) -> &str {
86        self.app_token.as_deref().unwrap_or("")
87    }
88
89    /// Returns the configured table ID, or `""` if not set.
90    pub fn table_id(&self) -> &str {
91        self.table_id.as_deref().unwrap_or("")
92    }
93
94    /// Returns the configured view ID if any.
95    pub fn view_id(&self) -> Option<&str> {
96        self.view_id.as_deref()
97    }
98
99    /// Returns the configured content field name if any.
100    pub fn content_field(&self) -> Option<&str> {
101        self.content_field.as_deref()
102    }
103
104    // ── Private helpers ──────────────────────────────────────────────────────
105
106    /// Convert a single Bitable record JSON object into a [`Document`].
107    fn record_to_document(&self, record: &Value) -> Document {
108        let record_id = record["record_id"].as_str().unwrap_or("").to_string();
109        let fields = record["fields"].as_object();
110
111        let mut metadata: HashMap<String, Value> = HashMap::new();
112        metadata.insert("record_id".to_string(), Value::String(record_id.clone()));
113        metadata.insert(
114            "source".to_string(),
115            Value::String("lark_bitable".to_string()),
116        );
117
118        let mut content = String::new();
119
120        if let Some(fields_map) = fields {
121            for (k, v) in fields_map {
122                if let Some(ref cf) = self.content_field {
123                    if k == cf {
124                        content = value_to_text(v);
125                    } else {
126                        metadata.insert(k.clone(), v.clone());
127                    }
128                } else {
129                    // Auto mode: use the first string-ish field as content.
130                    if content.is_empty() {
131                        if let Some(s) = v.as_str() {
132                            content = s.to_string();
133                        } else if v.is_array() || v.is_object() {
134                            metadata.insert(k.clone(), v.clone());
135                        } else {
136                            content = v.to_string();
137                        }
138                    } else {
139                        metadata.insert(k.clone(), v.clone());
140                    }
141                }
142            }
143        }
144
145        Document {
146            id: record_id,
147            content,
148            metadata,
149        }
150    }
151}
152
153/// Convert a Bitable field value to a plain string.
154///
155/// Rich-text arrays (used for text/multi-line fields) are joined by extracting
156/// each item's `"text"` property. Other scalar values fall back to `Value::to_string()`.
157fn value_to_text(v: &Value) -> String {
158    match v {
159        Value::String(s) => s.clone(),
160        Value::Array(arr) => arr
161            .iter()
162            .filter_map(|item| item["text"].as_str())
163            .collect::<Vec<_>>()
164            .join(""),
165        _ => v.to_string(),
166    }
167}
168
169#[async_trait]
170impl Loader for LarkBitableLoader {
171    async fn load(&self) -> Result<Vec<Document>, SynapticError> {
172        let app_token = self.app_token.as_deref().ok_or_else(|| {
173            SynapticError::Config("LarkBitableLoader: app_token not set".to_string())
174        })?;
175        let table_id = self.table_id.as_deref().ok_or_else(|| {
176            SynapticError::Config("LarkBitableLoader: table_id not set".to_string())
177        })?;
178
179        let mut docs = Vec::new();
180        let mut page_token: Option<String> = None;
181
182        loop {
183            let (items, next) = self
184                .api
185                .list_records_page(
186                    app_token,
187                    table_id,
188                    self.view_id.as_deref(),
189                    page_token.as_deref(),
190                )
191                .await?;
192            for record in &items {
193                docs.push(self.record_to_document(record));
194            }
195            match next {
196                Some(pt) => page_token = Some(pt),
197                None => break,
198            }
199        }
200
201        Ok(docs)
202    }
203}