Skip to main content

smooth_operator/
connector_config.rs

1//! Connector configuration storage (Phase 12, increment 3).
2//!
3//! The management console configures **connectors** — a `github`/`web`/`file`
4//! source the indexing loop pulls documents from — through the admin write API.
5//! A [`ConnectorConfig`] is the persisted, org-scoped description of one such
6//! source; the admin API CRUDs them and, on demand, builds a live
7//! `smooth_operator_ingestion::Connector` from one to trigger an indexing run.
8//!
9//! ## The `auth_ref` secret model — never store the secret
10//!
11//! A connector's `config` payload (a free-form [`serde_json::Value`]) may carry
12//! an **`auth_ref`** — the *name* of an environment variable / secret (e.g.
13//! `"GITHUB_TOKEN"`), **never the token itself**. The actual credential is
14//! resolved from the environment (or `@smooai/config` when deployed) at *index
15//! time*, never persisted in the store and never returned in an API response.
16//! This keeps the config store free of secret material: a leaked store row, log
17//! line, or API response exposes only a *reference*, not a credential.
18//!
19//! ## Persistence
20//!
21//! Ships with an [`InMemoryConnectorConfigStore`]. The persistent follow-up is a
22//! Postgres/DynamoDB `connector_configs` table keyed on `(org_id, id)` —
23//! [`upsert`](ConnectorConfigStore::upsert) is an INSERT … ON CONFLICT, `list`
24//! is `SELECT … WHERE org_id = $1`, `delete` is a scoped `DELETE`. Only the trait
25//! and in-memory impl are built here; the persistent adapters follow as siblings
26//! of the existing conversation/checkpoint adapters (see `docs/ADMIN-API.md`).
27
28use std::collections::HashMap;
29use std::sync::RwLock;
30
31use chrono::{DateTime, Utc};
32use serde::{Deserialize, Serialize};
33use serde_json::Value;
34
35/// The kind of source a [`ConnectorConfig`] describes. Mirrors the built-in
36/// `smooth_operator_ingestion` connectors (`github` / `web` / `file`); an
37/// unknown wire value is rejected at the API boundary with a clean 400.
38#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
39#[serde(rename_all = "lowercase")]
40pub enum ConnectorKind {
41    /// A GitHub repository (prose/code/issues) — `GithubConnector`.
42    Github,
43    /// A single public web URL — `WebConnector`.
44    Web,
45    /// A local file tree — `FileConnector`.
46    File,
47}
48
49impl ConnectorKind {
50    /// Parse a kind from a wire string (case-insensitive).
51    ///
52    /// # Errors
53    /// Returns `Err(value)` (the offending input) when not a known kind, so the
54    /// caller can build a precise 400 message.
55    pub fn parse(value: &str) -> Result<Self, String> {
56        match value.trim().to_ascii_lowercase().as_str() {
57            "github" => Ok(Self::Github),
58            "web" => Ok(Self::Web),
59            "file" => Ok(Self::File),
60            other => Err(other.to_string()),
61        }
62    }
63
64    /// The wire/string form of this kind.
65    #[must_use]
66    pub fn as_str(self) -> &'static str {
67        match self {
68            Self::Github => "github",
69            Self::Web => "web",
70            Self::File => "file",
71        }
72    }
73}
74
75/// A persisted, org-scoped connector configuration.
76///
77/// The `config` payload is connector-kind-specific and free-form so a new
78/// connector kind needs no schema migration. For `github` it carries
79/// `owner` / `repo` and optionally `ref` / `include` / `visibility`; for `web`
80/// a `url`; for `file` a `path`. The optional **`auth_ref`** names the secret to
81/// resolve at index time — the secret value is **never** stored here.
82#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
83#[serde(rename_all = "camelCase")]
84pub struct ConnectorConfig {
85    /// Stable id (uuid v4), unique within the org.
86    pub id: String,
87    /// The owning organization — every store operation is scoped to this.
88    pub org_id: String,
89    /// Human-readable name for the connector.
90    pub name: String,
91    /// The source kind.
92    pub kind: ConnectorKind,
93    /// Kind-specific configuration (owner/repo/url/path/include/ref/…). May carry
94    /// an `auth_ref` naming a secret; never the secret itself.
95    pub config: Value,
96    /// Whether the connector is active (a disabled connector is configured but
97    /// won't be auto-indexed by a scheduler; manual `/index` still works).
98    pub enabled: bool,
99    /// When the config row was created.
100    pub created_at: DateTime<Utc>,
101    /// When the config row was last updated.
102    pub updated_at: DateTime<Utc>,
103}
104
105impl ConnectorConfig {
106    /// The `auth_ref` (secret name) from the `config` payload, if present and a
107    /// non-empty string. This is the *name* to resolve from env/config at index
108    /// time — never a token value.
109    #[must_use]
110    pub fn auth_ref(&self) -> Option<&str> {
111        self.config
112            .get("auth_ref")
113            .and_then(Value::as_str)
114            .map(str::trim)
115            .filter(|s| !s.is_empty())
116    }
117}
118
119/// Storage seam for [`ConnectorConfig`]s. Every method is org-scoped so a
120/// caller can only ever see / mutate its own org's connectors.
121pub trait ConnectorConfigStore: Send + Sync {
122    /// All connector configs for `org_id`, sorted by `name` (stable).
123    fn list(&self, org_id: &str) -> Vec<ConnectorConfig>;
124
125    /// One connector config by `(org_id, id)`, or `None` if absent / in another
126    /// org (cross-org reads return `None`, never another org's row).
127    fn get(&self, org_id: &str, id: &str) -> Option<ConnectorConfig>;
128
129    /// Insert or update a connector config (keyed on `(org_id, id)`).
130    fn upsert(&self, config: ConnectorConfig);
131
132    /// Delete a connector config by `(org_id, id)`. Returns whether a row was
133    /// removed (so the API can 404 a delete of an absent / cross-org id).
134    fn delete(&self, org_id: &str, id: &str) -> bool;
135}
136
137/// In-memory [`ConnectorConfigStore`] keyed on `(org_id, id)`.
138#[derive(Default)]
139pub struct InMemoryConnectorConfigStore {
140    /// `(org_id, id)` → config.
141    rows: RwLock<HashMap<(String, String), ConnectorConfig>>,
142}
143
144impl InMemoryConnectorConfigStore {
145    /// A fresh, empty store.
146    #[must_use]
147    pub fn new() -> Self {
148        Self::default()
149    }
150}
151
152impl ConnectorConfigStore for InMemoryConnectorConfigStore {
153    fn list(&self, org_id: &str) -> Vec<ConnectorConfig> {
154        let Ok(rows) = self.rows.read() else {
155            return Vec::new();
156        };
157        let mut out: Vec<ConnectorConfig> = rows
158            .values()
159            .filter(|c| c.org_id == org_id)
160            .cloned()
161            .collect();
162        out.sort_by(|a, b| a.name.cmp(&b.name).then_with(|| a.id.cmp(&b.id)));
163        out
164    }
165
166    fn get(&self, org_id: &str, id: &str) -> Option<ConnectorConfig> {
167        let rows = self.rows.read().ok()?;
168        rows.get(&(org_id.to_string(), id.to_string())).cloned()
169    }
170
171    fn upsert(&self, config: ConnectorConfig) {
172        if let Ok(mut rows) = self.rows.write() {
173            rows.insert((config.org_id.clone(), config.id.clone()), config);
174        }
175    }
176
177    fn delete(&self, org_id: &str, id: &str) -> bool {
178        if let Ok(mut rows) = self.rows.write() {
179            rows.remove(&(org_id.to_string(), id.to_string())).is_some()
180        } else {
181            false
182        }
183    }
184}
185
186#[cfg(test)]
187mod tests {
188    use super::*;
189    use serde_json::json;
190
191    fn cfg(org: &str, id: &str, name: &str, kind: ConnectorKind, config: Value) -> ConnectorConfig {
192        let now = Utc::now();
193        ConnectorConfig {
194            id: id.into(),
195            org_id: org.into(),
196            name: name.into(),
197            kind,
198            config,
199            enabled: true,
200            created_at: now,
201            updated_at: now,
202        }
203    }
204
205    #[test]
206    fn kind_parse_roundtrips_and_rejects_unknown() {
207        assert_eq!(
208            ConnectorKind::parse("github").unwrap(),
209            ConnectorKind::Github
210        );
211        assert_eq!(ConnectorKind::parse("  WEB ").unwrap(), ConnectorKind::Web);
212        assert_eq!(ConnectorKind::parse("File").unwrap(), ConnectorKind::File);
213        assert_eq!(ConnectorKind::Github.as_str(), "github");
214        assert_eq!(ConnectorKind::parse("slack").unwrap_err(), "slack");
215    }
216
217    #[test]
218    fn upsert_list_get_are_org_scoped() {
219        let store = InMemoryConnectorConfigStore::new();
220        store.upsert(cfg(
221            "org-a",
222            "1",
223            "beta",
224            ConnectorKind::Web,
225            json!({"url": "https://b"}),
226        ));
227        store.upsert(cfg(
228            "org-a",
229            "2",
230            "alpha",
231            ConnectorKind::Web,
232            json!({"url": "https://a"}),
233        ));
234        store.upsert(cfg(
235            "org-b",
236            "3",
237            "gamma",
238            ConnectorKind::Web,
239            json!({"url": "https://g"}),
240        ));
241
242        // org-a sees only its two, sorted by name.
243        let a = store.list("org-a");
244        assert_eq!(a.len(), 2);
245        assert_eq!(a[0].name, "alpha");
246        assert_eq!(a[1].name, "beta");
247
248        // org-b sees only its one.
249        assert_eq!(store.list("org-b").len(), 1);
250
251        // Cross-org get returns None (org-b can't read org-a's id "1").
252        assert!(store.get("org-b", "1").is_none());
253        assert!(store.get("org-a", "1").is_some());
254    }
255
256    #[test]
257    fn upsert_updates_in_place() {
258        let store = InMemoryConnectorConfigStore::new();
259        store.upsert(cfg(
260            "o",
261            "1",
262            "name-1",
263            ConnectorKind::Web,
264            json!({"url": "https://1"}),
265        ));
266        store.upsert(cfg(
267            "o",
268            "1",
269            "name-2",
270            ConnectorKind::Web,
271            json!({"url": "https://2"}),
272        ));
273        let got = store.get("o", "1").unwrap();
274        assert_eq!(got.name, "name-2");
275        assert_eq!(store.list("o").len(), 1, "upsert replaces, not appends");
276    }
277
278    #[test]
279    fn delete_is_org_scoped_and_reports_removal() {
280        let store = InMemoryConnectorConfigStore::new();
281        store.upsert(cfg(
282            "o",
283            "1",
284            "n",
285            ConnectorKind::File,
286            json!({"path": "/d"}),
287        ));
288        // Cross-org delete is a no-op.
289        assert!(!store.delete("other", "1"));
290        assert!(store.get("o", "1").is_some());
291        // Scoped delete removes + reports true; a second delete reports false.
292        assert!(store.delete("o", "1"));
293        assert!(!store.delete("o", "1"));
294        assert!(store.get("o", "1").is_none());
295    }
296
297    #[test]
298    fn auth_ref_reads_secret_name_not_value() {
299        let with = cfg(
300            "o",
301            "1",
302            "n",
303            ConnectorKind::Github,
304            json!({"owner": "o", "repo": "r", "auth_ref": "GITHUB_TOKEN"}),
305        );
306        assert_eq!(with.auth_ref(), Some("GITHUB_TOKEN"));
307
308        // Absent / blank auth_ref ⇒ None.
309        let without = cfg(
310            "o",
311            "2",
312            "n",
313            ConnectorKind::Github,
314            json!({"owner": "o", "repo": "r"}),
315        );
316        assert_eq!(without.auth_ref(), None);
317        let blank = cfg(
318            "o",
319            "3",
320            "n",
321            ConnectorKind::Github,
322            json!({"auth_ref": "  "}),
323        );
324        assert_eq!(blank.auth_ref(), None);
325    }
326}