tuitbot_core/source/mod.rs
1//! Content source provider abstraction.
2//!
3//! Defines the `ContentSourceProvider` trait that both local filesystem and
4//! remote (e.g. Google Drive) sources implement. The trait covers scanning
5//! for changed files and reading content — the Watchtower orchestrates
6//! watching vs polling based on source type.
7
8pub mod connector;
9pub mod google_drive;
10pub mod local_fs;
11
12#[cfg(test)]
13mod tests;
14
15use async_trait::async_trait;
16
17// ---------------------------------------------------------------------------
18// Error type
19// ---------------------------------------------------------------------------
20
21/// Errors from content source providers.
22#[derive(Debug, thiserror::Error)]
23pub enum SourceError {
24 #[error("IO error: {0}")]
25 Io(#[from] std::io::Error),
26
27 #[error("network error: {0}")]
28 Network(String),
29
30 #[error("auth error: {0}")]
31 Auth(String),
32
33 #[error("not found: {0}")]
34 NotFound(String),
35
36 #[error("connection broken (id={connection_id}): {reason}")]
37 ConnectionBroken { connection_id: i64, reason: String },
38}
39
40// ---------------------------------------------------------------------------
41// Types
42// ---------------------------------------------------------------------------
43
44/// Metadata about a file discovered by a provider scan.
45#[derive(Debug, Clone)]
46pub struct SourceFile {
47 /// Stable identifier (relative path for local, `gdrive://<id>/<name>` for Drive).
48 pub provider_id: String,
49 /// Human-readable display name.
50 pub display_name: String,
51 /// SHA-256 content hash.
52 pub content_hash: String,
53 /// RFC 3339 modification timestamp.
54 pub modified_at: String,
55}
56
57// ---------------------------------------------------------------------------
58// Trait
59// ---------------------------------------------------------------------------
60
61/// Abstraction over content source backends.
62///
63/// Local sources use filesystem walking; remote sources use API polling.
64/// Both produce `SourceFile` metadata and string content that feeds
65/// into the shared Watchtower ingest pipeline.
66#[async_trait]
67pub trait ContentSourceProvider: Send + Sync {
68 /// Returns the source type identifier (e.g. `"local_fs"`, `"google_drive"`).
69 fn source_type(&self) -> &str;
70
71 /// Scan for files that changed since `since_cursor`.
72 ///
73 /// - `since_cursor`: opaque sync cursor from the last scan (RFC 3339 timestamp
74 /// or provider-specific token). `None` means full scan.
75 /// - `patterns`: glob patterns to filter files (e.g. `["*.md", "*.txt"]`).
76 ///
77 /// Returns metadata for each changed file. The caller is responsible for
78 /// calling `read_content` on files that need ingestion.
79 async fn scan_for_changes(
80 &self,
81 since_cursor: Option<&str>,
82 patterns: &[String],
83 ) -> Result<Vec<SourceFile>, SourceError>;
84
85 /// Read the full text content of a file by its provider ID.
86 async fn read_content(&self, file_id: &str) -> Result<String, SourceError>;
87}