tuitbot_core/source/mod.rs
1//! Content source provider abstraction.
2//!
3//! Defines the `ContentSourceProvider` trait that both local filesystem and
4//! remote (e.g. Google Drive) sources implement. The trait covers scanning
5//! for changed files and reading content — the Watchtower orchestrates
6//! watching vs polling based on source type.
7
8pub mod google_drive;
9pub mod local_fs;
10
11#[cfg(test)]
12mod tests;
13
14use async_trait::async_trait;
15
16// ---------------------------------------------------------------------------
17// Error type
18// ---------------------------------------------------------------------------
19
20/// Errors from content source providers.
21#[derive(Debug, thiserror::Error)]
22pub enum SourceError {
23 #[error("IO error: {0}")]
24 Io(#[from] std::io::Error),
25
26 #[error("network error: {0}")]
27 Network(String),
28
29 #[error("auth error: {0}")]
30 Auth(String),
31
32 #[error("not found: {0}")]
33 NotFound(String),
34}
35
36// ---------------------------------------------------------------------------
37// Types
38// ---------------------------------------------------------------------------
39
40/// Metadata about a file discovered by a provider scan.
41#[derive(Debug, Clone)]
42pub struct SourceFile {
43 /// Stable identifier (relative path for local, `gdrive://<id>/<name>` for Drive).
44 pub provider_id: String,
45 /// Human-readable display name.
46 pub display_name: String,
47 /// SHA-256 content hash.
48 pub content_hash: String,
49 /// RFC 3339 modification timestamp.
50 pub modified_at: String,
51}
52
53// ---------------------------------------------------------------------------
54// Trait
55// ---------------------------------------------------------------------------
56
57/// Abstraction over content source backends.
58///
59/// Local sources use filesystem walking; remote sources use API polling.
60/// Both produce `SourceFile` metadata and string content that feeds
61/// into the shared Watchtower ingest pipeline.
62#[async_trait]
63pub trait ContentSourceProvider: Send + Sync {
64 /// Returns the source type identifier (e.g. `"local_fs"`, `"google_drive"`).
65 fn source_type(&self) -> &str;
66
67 /// Scan for files that changed since `since_cursor`.
68 ///
69 /// - `since_cursor`: opaque sync cursor from the last scan (RFC 3339 timestamp
70 /// or provider-specific token). `None` means full scan.
71 /// - `patterns`: glob patterns to filter files (e.g. `["*.md", "*.txt"]`).
72 ///
73 /// Returns metadata for each changed file. The caller is responsible for
74 /// calling `read_content` on files that need ingestion.
75 async fn scan_for_changes(
76 &self,
77 since_cursor: Option<&str>,
78 patterns: &[String],
79 ) -> Result<Vec<SourceFile>, SourceError>;
80
81 /// Read the full text content of a file by its provider ID.
82 async fn read_content(&self, file_id: &str) -> Result<String, SourceError>;
83}