Skip to main content

tuitbot_core/source/
mod.rs

1//! Content source provider abstraction.
2//!
3//! Defines the `ContentSourceProvider` trait that both local filesystem and
4//! remote (e.g. Google Drive) sources implement. The trait covers scanning
5//! for changed files and reading content — the Watchtower orchestrates
6//! watching vs polling based on source type.
7
8pub mod connector;
9pub mod google_drive;
10pub mod local_fs;
11
12#[cfg(test)]
13mod tests;
14
15use async_trait::async_trait;
16
17// ---------------------------------------------------------------------------
18// Error type
19// ---------------------------------------------------------------------------
20
21/// Errors from content source providers.
22#[derive(Debug, thiserror::Error)]
23pub enum SourceError {
24    #[error("IO error: {0}")]
25    Io(#[from] std::io::Error),
26
27    #[error("network error: {0}")]
28    Network(String),
29
30    #[error("auth error: {0}")]
31    Auth(String),
32
33    #[error("not found: {0}")]
34    NotFound(String),
35
36    #[error("connection broken (id={connection_id}): {reason}")]
37    ConnectionBroken { connection_id: i64, reason: String },
38}
39
40// ---------------------------------------------------------------------------
41// Types
42// ---------------------------------------------------------------------------
43
44/// Metadata about a file discovered by a provider scan.
45#[derive(Debug, Clone)]
46pub struct SourceFile {
47    /// Stable identifier (relative path for local, `gdrive://<id>/<name>` for Drive).
48    pub provider_id: String,
49    /// Human-readable display name.
50    pub display_name: String,
51    /// SHA-256 content hash.
52    pub content_hash: String,
53    /// RFC 3339 modification timestamp.
54    pub modified_at: String,
55}
56
57// ---------------------------------------------------------------------------
58// Trait
59// ---------------------------------------------------------------------------
60
61/// Abstraction over content source backends.
62///
63/// Local sources use filesystem walking; remote sources use API polling.
64/// Both produce `SourceFile` metadata and string content that feeds
65/// into the shared Watchtower ingest pipeline.
66#[async_trait]
67pub trait ContentSourceProvider: Send + Sync {
68    /// Returns the source type identifier (e.g. `"local_fs"`, `"google_drive"`).
69    fn source_type(&self) -> &str;
70
71    /// Scan for files that changed since `since_cursor`.
72    ///
73    /// - `since_cursor`: opaque sync cursor from the last scan (RFC 3339 timestamp
74    ///   or provider-specific token). `None` means full scan.
75    /// - `patterns`: glob patterns to filter files (e.g. `["*.md", "*.txt"]`).
76    ///
77    /// Returns metadata for each changed file. The caller is responsible for
78    /// calling `read_content` on files that need ingestion.
79    async fn scan_for_changes(
80        &self,
81        since_cursor: Option<&str>,
82        patterns: &[String],
83    ) -> Result<Vec<SourceFile>, SourceError>;
84
85    /// Read the full text content of a file by its provider ID.
86    async fn read_content(&self, file_id: &str) -> Result<String, SourceError>;
87}