Skip to main content

stakpak_api/stakpak/knowledge/
mod.rs

1//! Knowledge-store endpoints (`/v1/knowledge/...`).
2//!
3//! Wraps the public knowledge CRUD surface with a local on-disk
4//! revalidation cache. The cache is keyed by `<account>/<path>` and uses
5//! the file body's SHA-256 as the ETag for `If-None-Match`. See [`cache`]
6//! for the on-disk layout.
7
8mod cache;
9
10use super::client::{ApiError, StakpakApiClient};
11use super::models::*;
12use crate::models::GetMyAccountResponse;
13use reqwest::{Response, StatusCode, header};
14use serde::de::DeserializeOwned;
15use std::path::{Component, Path, PathBuf};
16use std::time::{Duration, Instant};
17use tracing::debug;
18
19/// How long to suppress repeat `/v1/account` lookups after a failed resolution.
20/// Keeps a transient blip from turning every cached read into two round-trips
21/// for the lifetime of the client.
22const ACCOUNT_RESOLVE_BACKOFF: Duration = Duration::from_secs(60);
23
24/// Memoized result of resolving the cache account name. Lives on
25/// [`StakpakApiClient`] so the negative-cache survives across calls.
26#[derive(Debug, Clone)]
27pub(super) enum AccountCacheState {
28    /// Not yet attempted.
29    Unknown,
30    /// Successfully resolved; reuse forever.
31    Resolved(String),
32    /// Last attempt failed; don't retry until `until`.
33    Failed { until: Instant },
34}
35
36/// Structured error returned by the knowledge-store APIs.
37#[derive(Debug, Clone)]
38pub enum KnowledgeApiError {
39    /// Resource does not exist (HTTP 404).
40    NotFound { message: String },
41    /// Resource already exists (HTTP 409).
42    Conflict { message: String },
43    /// Caller is not authorized (HTTP 401 / 403).
44    Forbidden { message: String },
45    /// Request was rejected by the server (HTTP 400).
46    BadRequest { message: String },
47    /// Catch-all for any other HTTP error status, plus the raw body.
48    Http { status: StatusCode, message: String },
49    /// Transport / serialization / IO failure (no HTTP status available).
50    Transport { message: String },
51}
52
53impl KnowledgeApiError {
54    pub fn message(&self) -> &str {
55        match self {
56            Self::NotFound { message }
57            | Self::Conflict { message }
58            | Self::Forbidden { message }
59            | Self::BadRequest { message }
60            | Self::Http { message, .. }
61            | Self::Transport { message } => message,
62        }
63    }
64
65    /// Returns the HTTP status if the error came from the server.
66    pub fn status(&self) -> Option<StatusCode> {
67        match self {
68            Self::NotFound { .. } => Some(StatusCode::NOT_FOUND),
69            Self::Conflict { .. } => Some(StatusCode::CONFLICT),
70            Self::Forbidden { .. } => Some(StatusCode::FORBIDDEN),
71            Self::BadRequest { .. } => Some(StatusCode::BAD_REQUEST),
72            Self::Http { status, .. } => Some(*status),
73            Self::Transport { .. } => None,
74        }
75    }
76}
77
78impl std::fmt::Display for KnowledgeApiError {
79    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
80        match self {
81            Self::NotFound { message } => write!(f, "not found: {}", message),
82            Self::Conflict { message } => write!(f, "conflict: {}", message),
83            Self::Forbidden { message } => write!(f, "forbidden: {}", message),
84            Self::BadRequest { message } => write!(f, "bad request: {}", message),
85            Self::Http { status, message } => write!(f, "http {}: {}", status, message),
86            Self::Transport { message } => write!(f, "transport error: {}", message),
87        }
88    }
89}
90
91impl std::error::Error for KnowledgeApiError {}
92
93impl From<reqwest::Error> for KnowledgeApiError {
94    fn from(err: reqwest::Error) -> Self {
95        Self::Transport {
96            message: err.to_string(),
97        }
98    }
99}
100
101/// Percent-encode each segment of a path independently, preserving `/`
102/// separators so the URL still matches Axum's `{*path}` greedy capture
103/// after the server's path extractor decodes it.
104fn encode_path_segments(path: &str) -> String {
105    path.split('/')
106        .map(|seg| urlencoding::encode(seg).into_owned())
107        .collect::<Vec<_>>()
108        .join("/")
109}
110
111/// Normalize and validate a knowledge-store path using the same component
112/// rules as local AK path resolution.
113///
114/// Rejected components:
115/// - `..` parent traversal
116/// - absolute/rooted paths
117/// - platform prefixes (e.g. `C:` on Windows)
118///
119/// Accepted and normalized:
120/// - `.` components are removed
121/// - repeated separators collapse via component iteration
122fn normalize_knowledge_path(path: &str) -> Result<String, KnowledgeApiError> {
123    if path.is_empty() {
124        return Ok(String::new());
125    }
126
127    let mut parts: Vec<String> = Vec::new();
128    for component in Path::new(path).components() {
129        match component {
130            Component::Normal(part) => parts.push(part.to_string_lossy().into_owned()),
131            Component::CurDir => {}
132            Component::ParentDir | Component::RootDir | Component::Prefix(_) => {
133                return Err(KnowledgeApiError::BadRequest {
134                    message: format!("invalid store path: {path}"),
135                });
136            }
137        }
138    }
139
140    Ok(parts.join("/"))
141}
142
143impl StakpakApiClient {
144    /// Resolve the account name used as the cache root.
145    /// Successful resolutions are memoized for the lifetime of the client.
146    /// Failures are negatively cached for [`ACCOUNT_RESOLVE_BACKOFF`] so a
147    /// transient `/v1/account` outage doesn't turn every cached read into
148    /// two round-trips.
149    ///
150    /// Returns `None` when resolution is currently unavailable. Callers
151    /// should treat this as "cache disabled" and proceed.
152    async fn resolve_cache_account(&self) -> Option<String> {
153        // Fast path under the lock: reuse a prior result if we have one.
154        {
155            let state = self.account_name.lock().await;
156            match &*state {
157                AccountCacheState::Resolved(name) => return Some(name.clone()),
158                AccountCacheState::Failed { until } if Instant::now() < *until => return None,
159                _ => {}
160            }
161        }
162
163        let url = format!("{}/v1/account", self.base_url);
164        let resolved: Option<String> = match self.client.get(&url).send().await {
165            Ok(response) if response.status().is_success() => {
166                match response.json::<GetMyAccountResponse>().await {
167                    Ok(account) => Some(match account.scope {
168                        Some(scope) => scope.name,
169                        None => account.username,
170                    }),
171                    Err(e) => {
172                        debug!("knowledge cache: failed to parse account: {}", e);
173                        None
174                    }
175                }
176            }
177            Ok(response) => {
178                debug!(
179                    "knowledge cache: /v1/account returned {}",
180                    response.status()
181                );
182                None
183            }
184            Err(e) => {
185                debug!("knowledge cache: failed to fetch account: {}", e);
186                None
187            }
188        };
189
190        let mut state = self.account_name.lock().await;
191        match resolved {
192            Some(name) => {
193                // Another task may have raced us to a Resolved value; either
194                // is fine since both came from the same endpoint. Last writer
195                // wins.
196                *state = AccountCacheState::Resolved(name.clone());
197                Some(name)
198            }
199            None => {
200                // Don't clobber a Resolved value that arrived while we were
201                // failing in parallel.
202                if !matches!(&*state, AccountCacheState::Resolved(_)) {
203                    *state = AccountCacheState::Failed {
204                        until: Instant::now() + ACCOUNT_RESOLVE_BACKOFF,
205                    };
206                }
207                None
208            }
209        }
210    }
211
212    /// Read a knowledge file. Uses the on-disk cache at
213    /// `~/.stakpak/remote-knowledge/<account>/<path>` together with the
214    /// server's `If-None-Match` support to avoid re-downloading unchanged
215    /// content.
216    pub async fn read_knowledge_file(&self, path: &str) -> Result<Vec<u8>, KnowledgeApiError> {
217        self.read_knowledge_file_inner(path, false).await
218    }
219
220    /// Read at most the first `max_bytes` of a knowledge file. The server
221    /// supports a `peek` query parameter that returns a compact preview; if
222    /// the response exceeds `max_bytes` we truncate client-side.
223    ///
224    /// Peek requests bypass the on-disk cache because they share the path
225    /// with full reads but return a different body.
226    pub async fn peek_knowledge_file(
227        &self,
228        path: &str,
229        max_bytes: usize,
230    ) -> Result<Vec<u8>, KnowledgeApiError> {
231        let mut bytes = self.read_knowledge_file_inner(path, true).await?;
232        if bytes.len() > max_bytes {
233            bytes.truncate(max_bytes);
234        }
235        Ok(bytes)
236    }
237
238    async fn read_knowledge_file_inner(
239        &self,
240        path: &str,
241        peek_only: bool,
242    ) -> Result<Vec<u8>, KnowledgeApiError> {
243        let normalized_path = normalize_knowledge_path(path)?;
244        let encoded_path = encode_path_segments(&normalized_path);
245        let url = format!("{}/v1/knowledge/{}", self.base_url, encoded_path);
246
247        // Cache is only consulted for full reads. Peek bodies are different
248        // content for the same path - mixing them would corrupt the cache.
249        let cache_target: Option<PathBuf> = if peek_only {
250            None
251        } else {
252            self.resolve_cache_account()
253                .await
254                .and_then(|account| cache::cached_path(&account, &normalized_path))
255        };
256
257        let cached = match &cache_target {
258            Some(p) => cache::read_cached(p).await,
259            None => None,
260        };
261
262        let mut request = self.client.get(&url);
263        if peek_only {
264            request = request.query(&[("peek", "true")]);
265        }
266        if let Some((_, etag)) = &cached {
267            request = request.header(header::IF_NONE_MATCH, etag.as_str());
268        }
269        let response = request.send().await?;
270
271        match response.status() {
272            StatusCode::NOT_MODIFIED => match cached {
273                Some((bytes, _)) => Ok(bytes),
274                // We only set `If-None-Match` when we had a cached entry, so
275                // a 304 here means a proxy or middlebox injected the header.
276                // Surface a transport error rather than panicking; callers
277                // will retry without the cache on the next request.
278                None => Err(KnowledgeApiError::Transport {
279                    message: "received 304 Not Modified without sending If-None-Match".into(),
280                }),
281            },
282            status if status.is_success() => {
283                let bytes = response.bytes().await?.to_vec();
284                if let Some(target) = cache_target.as_ref() {
285                    // Best-effort cache write; never fails the request.
286                    cache::write_cached_atomic(target, &bytes).await;
287                }
288                Ok(bytes)
289            }
290            StatusCode::NOT_FOUND => {
291                // Server says it's gone - evict any stale local copy so we
292                // don't keep sending stale ETags for it.
293                if let Some(target) = cache_target.as_ref() {
294                    cache::evict_cached(target).await;
295                }
296                Err(Self::knowledge_error_from_response(response).await)
297            }
298            _ => Err(Self::knowledge_error_from_response(response).await),
299        }
300    }
301
302    /// Cheap existence check using HTTP HEAD. Does not transfer the body.
303    pub async fn knowledge_file_exists(&self, path: &str) -> Result<bool, KnowledgeApiError> {
304        let normalized_path = normalize_knowledge_path(path)?;
305        let encoded_path = encode_path_segments(&normalized_path);
306        let url = format!("{}/v1/knowledge/{}", self.base_url, encoded_path);
307        let response = self.client.head(&url).send().await?;
308
309        let status = response.status();
310        if status.is_success() {
311            Ok(true)
312        } else if status == StatusCode::NOT_FOUND {
313            Ok(false)
314        } else {
315            Err(Self::knowledge_error_from_response(response).await)
316        }
317    }
318
319    /// List knowledge files with optional filtering.
320    /// Bypasses the on-disk cache (no ETag mechanism for list responses).
321    pub async fn list_knowledge_files(
322        &self,
323        query: &ListKnowledgeFilesQuery,
324    ) -> Result<ListKnowledgeFilesResponse, KnowledgeApiError> {
325        let normalized_path = query
326            .path
327            .as_deref()
328            .map(normalize_knowledge_path)
329            .transpose()?;
330        let normalized_query = ListKnowledgeFilesQuery {
331            path: normalized_path,
332            glob: query.glob.clone(),
333        };
334
335        let url = format!("{}/v1/knowledge", self.base_url);
336        let response = self
337            .client
338            .get(&url)
339            .query(&normalized_query)
340            .send()
341            .await?;
342        self.handle_knowledge_response(response).await
343    }
344
345    /// Create a new knowledge file. Returns `Conflict` if a file already
346    /// exists at the target path.
347    ///
348    /// The cache is not populated here: the local cache only holds bodies
349    /// that came from a `GET /v1/knowledge/...` so we know the cached SHA
350    /// matches the server's ETag. The next read will populate it.
351    pub async fn create_knowledge_file(
352        &self,
353        path: &str,
354        content: &[u8],
355    ) -> Result<CreateKnowledgeFileResponse, KnowledgeApiError> {
356        let normalized_path = normalize_knowledge_path(path)?;
357        let encoded_path = encode_path_segments(&normalized_path);
358        let url = format!("{}/v1/knowledge/{}", self.base_url, encoded_path);
359        let response = self
360            .client
361            .post(&url)
362            .header(header::CONTENT_TYPE, "application/octet-stream")
363            .body(content.to_vec())
364            .send()
365            .await?;
366        self.handle_knowledge_response(response).await
367    }
368
369    /// Overwrite an existing knowledge file (or create if not exists).
370    ///
371    /// The cache is not populated here. Any stale local copy will be
372    /// revalidated on the next read: `If-None-Match` will miss against the
373    /// new server ETag and the client will refetch + replace the cached
374    /// body.
375    pub async fn overwrite_knowledge_file(
376        &self,
377        path: &str,
378        content: &[u8],
379    ) -> Result<UpdateKnowledgeFileResponse, KnowledgeApiError> {
380        let normalized_path = normalize_knowledge_path(path)?;
381        let encoded_path = encode_path_segments(&normalized_path);
382        let url = format!("{}/v1/knowledge/{}", self.base_url, encoded_path);
383        let response = self
384            .client
385            .put(&url)
386            .header(header::CONTENT_TYPE, "application/octet-stream")
387            .body(content.to_vec())
388            .send()
389            .await?;
390        self.handle_knowledge_response(response).await
391    }
392
393    /// Delete a knowledge file or directory. On success, evicts the matching
394    /// cache entry (file or directory tree).
395    pub async fn delete_knowledge_file(&self, path: &str) -> Result<(), KnowledgeApiError> {
396        let normalized_path = normalize_knowledge_path(path)?;
397        let encoded_path = encode_path_segments(&normalized_path);
398        let url = format!("{}/v1/knowledge/{}", self.base_url, encoded_path);
399        let response = self.client.delete(&url).send().await?;
400
401        if !response.status().is_success() {
402            return Err(Self::knowledge_error_from_response(response).await);
403        }
404
405        if let Some(account) = self.resolve_cache_account().await
406            && let Some(target) = cache::cached_path(&account, &normalized_path)
407        {
408            cache::evict_cached(&target).await;
409        }
410
411        Ok(())
412    }
413
414    /// Decode a JSON body on success; otherwise convert the response into a
415    /// typed [`KnowledgeApiError`].
416    async fn handle_knowledge_response<T: DeserializeOwned>(
417        &self,
418        response: Response,
419    ) -> Result<T, KnowledgeApiError> {
420        if !response.status().is_success() {
421            return Err(Self::knowledge_error_from_response(response).await);
422        }
423        let url = response.url().to_string();
424        let status = response.status();
425        let body = response
426            .text()
427            .await
428            .map_err(|e| KnowledgeApiError::Transport {
429                message: format!(
430                    "Failed to read response body from {} (status {}): {}",
431                    url, status, e
432                ),
433            })?;
434        serde_json::from_str(&body).map_err(|e| {
435            let truncated_body: String = body.chars().take(500).collect();
436            KnowledgeApiError::Transport {
437                message: format!(
438                    "Failed to decode response from {} (status {}): {} | body: {}",
439                    url, status, e, truncated_body
440                ),
441            }
442        })
443    }
444
445    /// Map a non-success HTTP response into a [`KnowledgeApiError`], using
446    /// the structured `ApiError` payload when present so we can surface the
447    /// server-provided message verbatim.
448    async fn knowledge_error_from_response(response: Response) -> KnowledgeApiError {
449        let status = response.status();
450        let body = response.text().await.unwrap_or_default();
451
452        let message = serde_json::from_str::<ApiError>(&body)
453            .map(|api| api.error.message)
454            .unwrap_or_else(|_| {
455                if body.is_empty() {
456                    status.canonical_reason().unwrap_or("error").to_string()
457                } else {
458                    body.clone()
459                }
460            });
461
462        match status {
463            StatusCode::NOT_FOUND => KnowledgeApiError::NotFound { message },
464            StatusCode::CONFLICT => KnowledgeApiError::Conflict { message },
465            StatusCode::UNAUTHORIZED | StatusCode::FORBIDDEN => {
466                KnowledgeApiError::Forbidden { message }
467            }
468            StatusCode::BAD_REQUEST => KnowledgeApiError::BadRequest { message },
469            other => KnowledgeApiError::Http {
470                status: other,
471                message,
472            },
473        }
474    }
475}
476
477#[cfg(test)]
478mod tests {
479    use super::{KnowledgeApiError, encode_path_segments, normalize_knowledge_path};
480
481    #[test]
482    fn normalize_path_rejects_parent_components() {
483        let err = normalize_knowledge_path("docs/../secrets.txt").unwrap_err();
484        assert!(matches!(err, KnowledgeApiError::BadRequest { .. }));
485    }
486
487    #[test]
488    fn normalize_path_rejects_absolute_paths() {
489        let err = normalize_knowledge_path("/etc/passwd").unwrap_err();
490        assert!(matches!(err, KnowledgeApiError::BadRequest { .. }));
491    }
492
493    #[test]
494    fn normalize_path_removes_dot_and_empty_segments() {
495        let normalized = normalize_knowledge_path("docs//./guides///intro.md").unwrap();
496        assert_eq!(normalized, "docs/guides/intro.md");
497    }
498
499    #[test]
500    fn encode_keeps_separators_and_encodes_each_segment() {
501        let encoded = encode_path_segments("team notes/2026 plan.md");
502        assert_eq!(encoded, "team%20notes/2026%20plan.md");
503    }
504}