Skip to main content

tuitbot_core/source/
mod.rs

1//! Content source provider abstraction.
2//!
3//! Defines the `ContentSourceProvider` trait that both local filesystem and
4//! remote (e.g. Google Drive) sources implement. The trait covers scanning
5//! for changed files and reading content — the Watchtower orchestrates
6//! watching vs polling based on source type.
7
8pub mod google_drive;
9pub mod local_fs;
10
11#[cfg(test)]
12mod tests;
13
14use async_trait::async_trait;
15
16// ---------------------------------------------------------------------------
17// Error type
18// ---------------------------------------------------------------------------
19
20/// Errors from content source providers.
21#[derive(Debug, thiserror::Error)]
22pub enum SourceError {
23    #[error("IO error: {0}")]
24    Io(#[from] std::io::Error),
25
26    #[error("network error: {0}")]
27    Network(String),
28
29    #[error("auth error: {0}")]
30    Auth(String),
31
32    #[error("not found: {0}")]
33    NotFound(String),
34}
35
36// ---------------------------------------------------------------------------
37// Types
38// ---------------------------------------------------------------------------
39
40/// Metadata about a file discovered by a provider scan.
41#[derive(Debug, Clone)]
42pub struct SourceFile {
43    /// Stable identifier (relative path for local, `gdrive://<id>/<name>` for Drive).
44    pub provider_id: String,
45    /// Human-readable display name.
46    pub display_name: String,
47    /// SHA-256 content hash.
48    pub content_hash: String,
49    /// RFC 3339 modification timestamp.
50    pub modified_at: String,
51}
52
53// ---------------------------------------------------------------------------
54// Trait
55// ---------------------------------------------------------------------------
56
57/// Abstraction over content source backends.
58///
59/// Local sources use filesystem walking; remote sources use API polling.
60/// Both produce `SourceFile` metadata and string content that feeds
61/// into the shared Watchtower ingest pipeline.
62#[async_trait]
63pub trait ContentSourceProvider: Send + Sync {
64    /// Returns the source type identifier (e.g. `"local_fs"`, `"google_drive"`).
65    fn source_type(&self) -> &str;
66
67    /// Scan for files that changed since `since_cursor`.
68    ///
69    /// - `since_cursor`: opaque sync cursor from the last scan (RFC 3339 timestamp
70    ///   or provider-specific token). `None` means full scan.
71    /// - `patterns`: glob patterns to filter files (e.g. `["*.md", "*.txt"]`).
72    ///
73    /// Returns metadata for each changed file. The caller is responsible for
74    /// calling `read_content` on files that need ingestion.
75    async fn scan_for_changes(
76        &self,
77        since_cursor: Option<&str>,
78        patterns: &[String],
79    ) -> Result<Vec<SourceFile>, SourceError>;
80
81    /// Read the full text content of a file by its provider ID.
82    async fn read_content(&self, file_id: &str) -> Result<String, SourceError>;
83}