Skip to main content

orbok_db/repo/
files.rs

1//! File catalog repository (RFC-002 §7.3, RFC-004).
2//!
3//! The scanner drives these operations: upsert on discovery, metadata
4//! comparison for change detection, missing-marking for unseen files.
5//! File catalog records are persistent catalog data (RFC-001 §5.3) and
6//! survive index cleanup.
7
8use crate::catalog::{Catalog, db_err};
9use orbok_core::{FileId, FileStatus, OrbokError, OrbokResult, SourceId, now_iso8601};
10use rusqlite::{Row, params};
11
12/// A cataloged file.
13#[derive(Debug, Clone)]
14pub struct FileRecord {
15    pub file_id: FileId,
16    pub source_id: SourceId,
17    pub original_path: String,
18    pub canonical_path: String,
19    pub display_path: String,
20    pub extension: Option<String>,
21    pub file_size_bytes: u64,
22    pub modified_at: Option<String>,
23    pub platform_file_key: Option<String>,
24    pub content_hash: Option<String>,
25    pub hash_algorithm: Option<String>,
26    pub file_status: FileStatus,
27    pub last_seen_at: String,
28    pub last_indexed_at: Option<String>,
29}
30
31/// Parameters for inserting a newly discovered file.
32#[derive(Debug, Clone)]
33pub struct NewFile {
34    pub source_id: SourceId,
35    pub original_path: String,
36    pub canonical_path: String,
37    pub display_path: String,
38    pub extension: Option<String>,
39    pub metadata: ObservedMetadata,
40    pub status: FileStatus,
41}
42
43/// Metadata observed on disk during a scan (RFC-004 §9.1 fast check).
44#[derive(Debug, Clone, Default)]
45pub struct ObservedMetadata {
46    pub file_size_bytes: u64,
47    pub modified_at: Option<String>,
48    pub platform_file_key: Option<String>,
49    pub content_hash: Option<String>,
50}
51
52const COLUMNS: &str = "file_id, source_id, original_path, canonical_path, display_path, \
53     extension, file_size_bytes, modified_at, platform_file_key, content_hash, hash_algorithm, \
54     file_status, last_seen_at, last_indexed_at";
55
56/// Repository over the `files` table.
57pub struct FileRepository<'a> {
58    catalog: &'a Catalog,
59}
60
61impl<'a> FileRepository<'a> {
62    pub fn new(catalog: &'a Catalog) -> Self {
63        Self { catalog }
64    }
65
66    /// Look up a file by its identity key (source, canonical path).
67    pub fn get_by_path(
68        &self,
69        source_id: &SourceId,
70        canonical_path: &str,
71    ) -> OrbokResult<Option<FileRecord>> {
72        let conn = self.catalog.lock();
73        let mut stmt = conn
74            .prepare(&format!(
75                "SELECT {COLUMNS} FROM files WHERE source_id = ?1 AND canonical_path = ?2"
76            ))
77            .map_err(db_err)?;
78        let mut rows = stmt
79            .query_map(params![source_id.as_str(), canonical_path], row_to_record)
80            .map_err(db_err)?;
81        match rows.next() {
82            Some(r) => Ok(Some(r.map_err(db_err)??)),
83            None => Ok(None),
84        }
85    }
86
87    /// Insert a newly discovered file.
88    pub fn insert(&self, new: NewFile) -> OrbokResult<FileRecord> {
89        let id = FileId::generate();
90        let now = now_iso8601();
91        let hash_algorithm = new.metadata.content_hash.as_ref().map(|_| "sha256");
92        let conn = self.catalog.lock();
93        conn.execute(
94            "INSERT INTO files (file_id, source_id, original_path, canonical_path, display_path, \
95             extension, file_size_bytes, modified_at, platform_file_key, content_hash, \
96             hash_algorithm, file_status, last_seen_at, last_scanned_at, created_at, updated_at) \
97             VALUES (?1,?2,?3,?4,?5,?6,?7,?8,?9,?10,?11,?12,?13,?13,?13,?13)",
98            params![
99                id.as_str(),
100                new.source_id.as_str(),
101                new.original_path,
102                new.canonical_path,
103                new.display_path,
104                new.extension,
105                new.metadata.file_size_bytes as i64,
106                new.metadata.modified_at,
107                new.metadata.platform_file_key,
108                new.metadata.content_hash,
109                hash_algorithm,
110                new.status.as_str(),
111                now,
112            ],
113        )
114        .map_err(db_err)?;
115        drop(conn);
116        self.get_by_path_id(&id)
117    }
118
119    fn get_by_path_id(&self, id: &FileId) -> OrbokResult<FileRecord> {
120        let conn = self.catalog.lock();
121        let mut stmt = conn
122            .prepare(&format!("SELECT {COLUMNS} FROM files WHERE file_id = ?1"))
123            .map_err(db_err)?;
124        let mut rows = stmt
125            .query_map(params![id.as_str()], row_to_record)
126            .map_err(db_err)?;
127        match rows.next() {
128            Some(r) => r.map_err(db_err)?,
129            None => Err(OrbokError::FileNotFound),
130        }
131    }
132
133    /// Touch a file confirmed unchanged by the metadata check.
134    pub fn touch_seen(&self, id: &FileId) -> OrbokResult<()> {
135        let now = now_iso8601();
136        let conn = self.catalog.lock();
137        conn.execute(
138            "UPDATE files SET last_seen_at = ?2, last_scanned_at = ?2, updated_at = ?2 \
139             WHERE file_id = ?1",
140            params![id.as_str(), now],
141        )
142        .map_err(db_err)?;
143        Ok(())
144    }
145
146    /// Record changed on-disk metadata and the resulting status
147    /// transition (RFC-004 §12 stale detection).
148    pub fn update_observed(
149        &self,
150        id: &FileId,
151        metadata: &ObservedMetadata,
152        status: FileStatus,
153    ) -> OrbokResult<()> {
154        let now = now_iso8601();
155        let hash_algorithm = metadata.content_hash.as_ref().map(|_| "sha256");
156        let conn = self.catalog.lock();
157        conn.execute(
158            "UPDATE files SET file_size_bytes = ?2, modified_at = ?3, platform_file_key = ?4, \
159             content_hash = COALESCE(?5, content_hash), \
160             hash_algorithm = COALESCE(?6, hash_algorithm), file_status = ?7, \
161             last_seen_at = ?8, last_scanned_at = ?8, updated_at = ?8 WHERE file_id = ?1",
162            params![
163                id.as_str(),
164                metadata.file_size_bytes as i64,
165                metadata.modified_at,
166                metadata.platform_file_key,
167                metadata.content_hash,
168                hash_algorithm,
169                status.as_str(),
170                now,
171            ],
172        )
173        .map_err(db_err)?;
174        Ok(())
175    }
176
177    /// Set status only (e.g. permission_denied observed mid-scan).
178    pub fn set_status(&self, id: &FileId, status: FileStatus) -> OrbokResult<()> {
179        let conn = self.catalog.lock();
180        conn.execute(
181            "UPDATE files SET file_status = ?2, updated_at = ?3 WHERE file_id = ?1",
182            params![id.as_str(), status.as_str(), now_iso8601()],
183        )
184        .map_err(db_err)?;
185        Ok(())
186    }
187
188    /// RFC-004 §11: mark files of `source_id` not seen since `cutoff`
189    /// as Missing — never Deleted (drives may be disconnected). Returns
190    /// the number of newly missing files.
191    pub fn mark_missing_unseen(&self, source_id: &SourceId, cutoff: &str) -> OrbokResult<u64> {
192        let conn = self.catalog.lock();
193        let n = conn
194            .execute(
195                "UPDATE files SET file_status = 'missing', updated_at = ?3 \
196                 WHERE source_id = ?1 AND last_seen_at < ?2 \
197                 AND file_status NOT IN ('missing', 'deleted')",
198                params![source_id.as_str(), cutoff, now_iso8601()],
199            )
200            .map_err(db_err)?;
201        Ok(n as u64)
202    }
203
204    /// Status counts for one source (Indexing/Sources view summaries).
205    pub fn count_by_status(&self, source_id: &SourceId) -> OrbokResult<Vec<(FileStatus, u64)>> {
206        let conn = self.catalog.lock();
207        let mut stmt = conn
208            .prepare(
209                "SELECT file_status, COUNT(*) FROM files WHERE source_id = ?1 GROUP BY file_status",
210            )
211            .map_err(db_err)?;
212        let rows = stmt
213            .query_map(params![source_id.as_str()], |row| {
214                Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
215            })
216            .map_err(db_err)?;
217        let mut out = Vec::new();
218        for row in rows {
219            let (status, count) = row.map_err(db_err)?;
220            out.push((FileStatus::parse(&status)?, count as u64));
221        }
222        Ok(out)
223    }
224}
225
226fn row_to_record(row: &Row<'_>) -> rusqlite::Result<OrbokResult<FileRecord>> {
227    let status: String = row.get(11)?;
228    let size: i64 = row.get(6)?;
229    Ok((|| {
230        Ok(FileRecord {
231            file_id: FileId::from_string(row.get::<_, String>(0).map_err(db_err)?),
232            source_id: SourceId::from_string(row.get::<_, String>(1).map_err(db_err)?),
233            original_path: row.get(2).map_err(db_err)?,
234            canonical_path: row.get(3).map_err(db_err)?,
235            display_path: row.get(4).map_err(db_err)?,
236            extension: row.get(5).map_err(db_err)?,
237            file_size_bytes: size as u64,
238            modified_at: row.get(7).map_err(db_err)?,
239            platform_file_key: row.get(8).map_err(db_err)?,
240            content_hash: row.get(9).map_err(db_err)?,
241            hash_algorithm: row.get(10).map_err(db_err)?,
242            file_status: FileStatus::parse(&status)?,
243            last_seen_at: row.get(12).map_err(db_err)?,
244            last_indexed_at: row.get(13).map_err(db_err)?,
245        })
246    })())
247}