Skip to main content

offline_intelligence/memory_db/
all_files_store.rs

1//! All Files store - unlimited storage for all file formats with folder support
2//!
3//! Files are stored with metadata in SQLite and actual content in the all_files folder.
4
5use chrono::{DateTime, Utc};
6use r2d2::Pool;
7use r2d2_sqlite::SqliteConnectionManager;
8use serde::{Deserialize, Serialize};
9use std::path::{Path, PathBuf};
10use std::sync::Arc;
11use tracing::info;
12
13/// Represents a file or directory in all_files storage
14#[derive(Debug, Clone, Serialize, Deserialize)]
15pub struct AllFile {
16    pub id: i64,
17    pub name: String,
18    pub path: String,
19    pub parent_id: Option<i64>,
20    pub is_directory: bool,
21    pub file_path: Option<String>,
22    pub size_bytes: i64,
23    pub mime_type: Option<String>,
24    pub created_at: DateTime<Utc>,
25    pub modified_at: DateTime<Utc>,
26    pub last_accessed: Option<DateTime<Utc>>,
27    pub access_count: i64,
28}
29
30/// Represents a file tree node with children (for nested display)
31#[derive(Debug, Clone, Serialize, Deserialize)]
32pub struct AllFileTree {
33    #[serde(flatten)]
34    pub file: AllFile,
35    #[serde(skip_serializing_if = "Option::is_none")]
36    pub children: Option<Vec<AllFileTree>>,
37}
38
39/// Store for managing all files in database
40pub struct AllFilesStore {
41    pool: Arc<Pool<SqliteConnectionManager>>,
42    all_files_dir: PathBuf,
43}
44
45impl AllFilesStore {
46    /// Create a new all files store
47    pub fn new(pool: Arc<Pool<SqliteConnectionManager>>, all_files_dir: PathBuf) -> Self {
48        if !all_files_dir.exists() {
49            if let Err(e) = std::fs::create_dir_all(&all_files_dir) {
50                tracing::warn!("Failed to create all files directory: {}", e);
51            }
52        }
53        Self {
54            pool,
55            all_files_dir,
56        }
57    }
58
59    /// Get the all files directory path
60    pub fn get_all_files_dir(&self) -> &Path {
61        &self.all_files_dir
62    }
63
64    /// Create a folder
65    pub fn create_folder(&self, parent_id: Option<i64>, name: &str) -> anyhow::Result<AllFile> {
66        let conn = self.pool.get()?;
67
68        let path = if let Some(pid) = parent_id {
69            let parent_path: String =
70                conn.query_row("SELECT path FROM all_files WHERE id = ?1", [pid], |row| {
71                    row.get(0)
72                })?;
73            format!("{}/{}", parent_path, name)
74        } else {
75            name.to_string()
76        };
77
78        let now = chrono::Utc::now().to_rfc3339();
79
80        conn.execute(
81            "INSERT INTO all_files (name, path, parent_id, is_directory, created_at, modified_at)
82             VALUES (?1, ?2, ?3, TRUE, ?4, ?4)",
83            rusqlite::params![name, path, parent_id, now],
84        )?;
85
86        let id = conn.last_insert_rowid();
87
88        let fs_path = self.all_files_dir.join(&path);
89        std::fs::create_dir_all(&fs_path)?;
90
91        info!("Created all_files folder: {} (id: {})", path, id);
92
93        self.get_file(id)
94    }
95
96    /// Upload a file
97    pub fn upload_file(
98        &self,
99        parent_id: Option<i64>,
100        name: &str,
101        content: &[u8],
102        mime_type: Option<&str>,
103    ) -> anyhow::Result<AllFile> {
104        let conn = self.pool.get()?;
105
106        let path = if let Some(pid) = parent_id {
107            let parent_path: String =
108                conn.query_row("SELECT path FROM all_files WHERE id = ?1", [pid], |row| {
109                    row.get(0)
110                })?;
111            format!("{}/{}", parent_path, name)
112        } else {
113            name.to_string()
114        };
115
116        let fs_path = self.all_files_dir.join(&path);
117        if let Some(parent) = fs_path.parent() {
118            std::fs::create_dir_all(parent)?;
119        }
120        std::fs::write(&fs_path, content)?;
121
122        let now = chrono::Utc::now().to_rfc3339();
123        let size = content.len() as i64;
124
125        // Handle NULL parent_id specially for SQLite
126        let existing_id: Option<i64> = if parent_id.is_none() {
127            conn.query_row(
128                "SELECT id FROM all_files WHERE parent_id IS NULL AND name = ?1",
129                [name],
130                |row| row.get(0),
131            )
132            .ok()
133        } else {
134            conn.query_row(
135                "SELECT id FROM all_files WHERE parent_id = ?1 AND name = ?2",
136                rusqlite::params![parent_id, name],
137                |row| row.get(0),
138            )
139            .ok()
140        };
141
142        let id = if let Some(existing) = existing_id {
143            conn.execute(
144                "UPDATE all_files SET file_path = ?1, size_bytes = ?2, mime_type = ?3, modified_at = ?4
145                 WHERE id = ?5",
146                rusqlite::params![path, size, mime_type, now, existing],
147            )?;
148            existing
149        } else {
150            conn.execute(
151                "INSERT INTO all_files (name, path, parent_id, is_directory, file_path, size_bytes, mime_type, created_at, modified_at)
152                 VALUES (?1, ?2, ?3, FALSE, ?2, ?4, ?5, ?6, ?6)",
153                rusqlite::params![name, path, parent_id, size, mime_type, now],
154            )?;
155            conn.last_insert_rowid()
156        };
157
158        info!("Uploaded all_files: {} ({} bytes)", path, size);
159
160        self.get_file(id)
161    }
162
163    /// Upload multiple files with their directory structure
164    pub fn upload_files_with_structure(
165        &self,
166        files: Vec<(Option<String>, String, Vec<u8>)>,
167        parent_id: Option<i64>,
168    ) -> anyhow::Result<Vec<AllFile>>
169    where
170        Option<String>: std::fmt::Debug,
171    {
172        let mut uploaded = Vec::new();
173
174        for (relative_path, filename, content) in files {
175            let path = if let Some(parent_path) = relative_path {
176                if parent_path.is_empty() {
177                    filename.clone()
178                } else {
179                    format!("{}/{}", parent_path, filename)
180                }
181            } else {
182                filename.clone()
183            };
184
185            // Split by both / and \ to handle Windows and Unix paths
186            let parts: Vec<&str> = path.split(['/', '\\']).filter(|s| !s.is_empty()).collect();
187            let mut current_parent_id: Option<i64> = parent_id;
188
189            for (i, part) in parts.iter().enumerate() {
190                let is_last = i == parts.len() - 1;
191                let is_dir = !is_last;
192
193                if is_dir {
194                    // Handle NULL parent_id specially for SQLite
195                    let existing_dir_id: Option<i64> = if current_parent_id.is_none() {
196                        self.pool.get()?.query_row(
197                            "SELECT id FROM all_files WHERE parent_id IS NULL AND name = ?1 AND is_directory = TRUE",
198                            [part],
199                            |row| row.get(0),
200                        ).ok()
201                    } else {
202                        self.pool.get()?.query_row(
203                            "SELECT id FROM all_files WHERE parent_id = ?1 AND name = ?2 AND is_directory = TRUE",
204                            rusqlite::params![current_parent_id, part],
205                            |row| row.get(0),
206                        ).ok()
207                    };
208
209                    current_parent_id = if let Some(dir_id) = existing_dir_id {
210                        Some(dir_id)
211                    } else {
212                        let new_dir = self.create_folder(current_parent_id, part)?;
213                        Some(new_dir.id)
214                    };
215                } else {
216                    let mime_type = mime_guess::from_path(&path).first().map(|m| m.to_string());
217
218                    let file =
219                        self.upload_file(current_parent_id, part, &content, mime_type.as_deref())?;
220                    uploaded.push(file);
221                    break;
222                }
223            }
224        }
225
226        Ok(uploaded)
227    }
228
229    /// Get file tree (nested)
230    pub fn get_file_tree(&self) -> anyhow::Result<Vec<AllFileTree>> {
231        let conn = self.pool.get()?;
232
233        let mut stmt = conn.prepare(
234            "SELECT id, name, path, parent_id, is_directory, file_path, size_bytes, mime_type, created_at, modified_at, last_accessed, access_count 
235             FROM all_files WHERE parent_id IS NULL 
236             ORDER BY is_directory DESC, name ASC"
237        )?;
238
239        let root_files: Vec<AllFile> = stmt
240            .query_map([], |row| {
241                Ok(AllFile {
242                    id: row.get(0)?,
243                    name: row.get(1)?,
244                    path: row.get(2)?,
245                    parent_id: row.get(3)?,
246                    is_directory: row.get(4)?,
247                    file_path: row.get(5)?,
248                    size_bytes: row.get(6)?,
249                    mime_type: row.get(7)?,
250                    created_at: row
251                        .get::<_, String>(8)?
252                        .parse()
253                        .unwrap_or_else(|_| Utc::now()),
254                    modified_at: row
255                        .get::<_, String>(9)?
256                        .parse()
257                        .unwrap_or_else(|_| Utc::now()),
258                    last_accessed: row
259                        .get::<_, Option<String>>(10)?
260                        .and_then(|s| s.parse().ok()),
261                    access_count: row.get(11)?,
262                })
263            })?
264            .collect::<Result<Vec<_>, _>>()
265            .unwrap_or_default();
266
267        let tree: Vec<AllFileTree> = root_files
268            .into_iter()
269            .map(|f| self.build_tree(&f, &conn))
270            .collect();
271
272        Ok(tree)
273    }
274
275    fn build_tree(
276        &self,
277        file: &AllFile,
278        conn: &r2d2::PooledConnection<SqliteConnectionManager>,
279    ) -> AllFileTree {
280        let children = if file.is_directory {
281            let child_files: Vec<AllFile> = conn
282                .prepare(
283                    "SELECT id, name, path, parent_id, is_directory, file_path, size_bytes, mime_type, created_at, modified_at, last_accessed, access_count 
284                     FROM all_files WHERE parent_id = ?1 
285                     ORDER BY is_directory DESC, name ASC",
286                )
287                .and_then(|mut stmt| {
288                    stmt.query_map([file.id], |row| {
289                        Ok(AllFile {
290                            id: row.get(0)?,
291                            name: row.get(1)?,
292                            path: row.get(2)?,
293                            parent_id: row.get(3)?,
294                            is_directory: row.get(4)?,
295                            file_path: row.get(5)?,
296                            size_bytes: row.get(6)?,
297                            mime_type: row.get(7)?,
298                            created_at: row.get::<_, String>(8)?.parse().unwrap_or_else(|_| Utc::now()),
299                            modified_at: row.get::<_, String>(9)?.parse().unwrap_or_else(|_| Utc::now()),
300                            last_accessed: row.get::<_, Option<String>>(10)?.and_then(|s| s.parse().ok()),
301                            access_count: row.get(11)?,
302                        })
303                    })
304                    .and_then(|rows| rows.collect::<Result<Vec<_>, _>>())
305                })
306                .unwrap_or_default();
307
308            if child_files.is_empty() {
309                None
310            } else {
311                Some(
312                    child_files
313                        .into_iter()
314                        .map(|f| self.build_tree(&f, conn))
315                        .collect(),
316                )
317            }
318        } else {
319            None
320        };
321
322        AllFileTree {
323            file: file.clone(),
324            children,
325        }
326    }
327
328    /// Get all files (flat list)
329    pub fn get_all_files(&self) -> anyhow::Result<Vec<AllFile>> {
330        let conn = self.pool.get()?;
331
332        let mut stmt = conn.prepare(
333            "SELECT id, name, path, parent_id, is_directory, file_path, size_bytes, mime_type, created_at, modified_at, last_accessed, access_count 
334             FROM all_files WHERE is_directory = FALSE 
335             ORDER BY name ASC"
336        )?;
337
338        let files = stmt
339            .query_map([], |row| {
340                Ok(AllFile {
341                    id: row.get(0)?,
342                    name: row.get(1)?,
343                    path: row.get(2)?,
344                    parent_id: row.get(3)?,
345                    is_directory: row.get(4)?,
346                    file_path: row.get(5)?,
347                    size_bytes: row.get(6)?,
348                    mime_type: row.get(7)?,
349                    created_at: row
350                        .get::<_, String>(8)?
351                        .parse()
352                        .unwrap_or_else(|_| Utc::now()),
353                    modified_at: row
354                        .get::<_, String>(9)?
355                        .parse()
356                        .unwrap_or_else(|_| Utc::now()),
357                    last_accessed: row
358                        .get::<_, Option<String>>(10)?
359                        .and_then(|s| s.parse().ok()),
360                    access_count: row.get(11)?,
361                })
362            })?
363            .collect::<Result<Vec<_>, _>>()?;
364
365        Ok(files)
366    }
367
368    /// Get single file by ID
369    pub fn get_file(&self, id: i64) -> anyhow::Result<AllFile> {
370        let conn = self.pool.get()?;
371
372        conn.query_row(
373            "SELECT id, name, path, parent_id, is_directory, file_path, size_bytes, mime_type, created_at, modified_at, last_accessed, access_count 
374             FROM all_files WHERE id = ?1",
375            [id],
376            |row| {
377                Ok(AllFile {
378                    id: row.get(0)?,
379                    name: row.get(1)?,
380                    path: row.get(2)?,
381                    parent_id: row.get(3)?,
382                    is_directory: row.get(4)?,
383                    file_path: row.get(5)?,
384                    size_bytes: row.get(6)?,
385                    mime_type: row.get(7)?,
386                    created_at: row.get::<_, String>(8)?.parse().unwrap_or_else(|_| Utc::now()),
387                    modified_at: row.get::<_, String>(9)?.parse().unwrap_or_else(|_| Utc::now()),
388                    last_accessed: row.get::<_, Option<String>>(10)?.and_then(|s| s.parse().ok()),
389                    access_count: row.get(11)?,
390                })
391            },
392        ).map_err(|e| anyhow::anyhow!("File not found: {}", e))
393    }
394
395    /// Get file by path
396    pub fn get_file_by_path(&self, path: &str) -> anyhow::Result<AllFile> {
397        let conn = self.pool.get()?;
398
399        conn.query_row(
400            "SELECT id, name, path, parent_id, is_directory, file_path, size_bytes, mime_type, created_at, modified_at, last_accessed, access_count 
401             FROM all_files WHERE path = ?1",
402            [path],
403            |row| {
404                Ok(AllFile {
405                    id: row.get(0)?,
406                    name: row.get(1)?,
407                    path: row.get(2)?,
408                    parent_id: row.get(3)?,
409                    is_directory: row.get(4)?,
410                    file_path: row.get(5)?,
411                    size_bytes: row.get(6)?,
412                    mime_type: row.get(7)?,
413                    created_at: row.get::<_, String>(8)?.parse().unwrap_or_else(|_| Utc::now()),
414                    modified_at: row.get::<_, String>(9)?.parse().unwrap_or_else(|_| Utc::now()),
415                    last_accessed: row.get::<_, Option<String>>(10)?.and_then(|s| s.parse().ok()),
416                    access_count: row.get(11)?,
417                })
418            },
419        ).map_err(|e| anyhow::anyhow!("File not found: {}", e))
420    }
421
422    /// Get file content as raw bytes (for binary-aware extraction)
423    pub fn get_file_bytes(&self, id: i64) -> anyhow::Result<Vec<u8>> {
424        let file = self.get_file(id)?;
425
426        if file.is_directory {
427            return Err(anyhow::anyhow!("Cannot read content of directory"));
428        }
429
430        let file_path = file
431            .file_path
432            .ok_or_else(|| anyhow::anyhow!("File path not stored"))?;
433        let fs_path = self.all_files_dir.join(&file_path);
434
435        std::fs::read(&fs_path).map_err(|e| anyhow::anyhow!("Failed to read file: {}", e))
436    }
437
438    /// Get file content as string (text files only — binary files return lossy UTF-8)
439    pub fn get_file_content_string(&self, id: i64) -> anyhow::Result<String> {
440        let file = self.get_file(id)?;
441
442        if file.is_directory {
443            return Err(anyhow::anyhow!("Cannot read content of directory"));
444        }
445
446        let file_path = file
447            .file_path
448            .ok_or_else(|| anyhow::anyhow!("File path not stored"))?;
449        let fs_path = self.all_files_dir.join(&file_path);
450
451        std::fs::read_to_string(&fs_path)
452            .or_else(|_| {
453                std::fs::read(&fs_path).map(|bytes| String::from_utf8_lossy(&bytes).to_string())
454            })
455            .map_err(|e| anyhow::anyhow!("Failed to read file: {}", e))
456    }
457
458    /// Update access count
459    pub fn record_access(&self, id: i64) -> anyhow::Result<()> {
460        let conn = self.pool.get()?;
461        let now = chrono::Utc::now().to_rfc3339();
462
463        conn.execute(
464            "UPDATE all_files SET last_accessed = ?1, access_count = access_count + 1 WHERE id = ?2",
465            rusqlite::params![now, id],
466        )?;
467
468        Ok(())
469    }
470
471    /// Delete file or folder by ID
472    pub fn delete_file(&self, id: i64) -> anyhow::Result<()> {
473        let file = self.get_file(id)?;
474
475        if file.is_directory {
476            self.delete_recursive(id)?;
477        } else {
478            if let Some(file_path) = file.file_path {
479                let fs_path = self.all_files_dir.join(&file_path);
480                let _ = std::fs::remove_file(fs_path);
481            }
482        }
483
484        let conn = self.pool.get()?;
485        conn.execute("DELETE FROM all_files WHERE id = ?1", [id])?;
486
487        Ok(())
488    }
489
490    fn delete_recursive(&self, id: i64) -> anyhow::Result<()> {
491        let conn = self.pool.get()?;
492
493        let children: Vec<i64> = conn
494            .prepare("SELECT id FROM all_files WHERE parent_id = ?1")?
495            .query_map([id], |row| row.get(0))?
496            .collect::<Result<Vec<_>, _>>()?;
497
498        for child_id in children {
499            self.delete_file(child_id)?;
500        }
501
502        let file = self.get_file(id)?;
503        if let Some(file_path) = file.file_path {
504            let fs_path = self.all_files_dir.join(&file_path);
505            let _ = std::fs::remove_dir_all(fs_path);
506        }
507
508        Ok(())
509    }
510
511    /// Search files by name
512    pub fn search_files(&self, query: &str) -> anyhow::Result<Vec<AllFile>> {
513        let conn = self.pool.get()?;
514
515        let pattern = format!("%{}%", query);
516
517        let mut stmt = conn.prepare(
518            "SELECT id, name, path, parent_id, is_directory, file_path, size_bytes, mime_type, created_at, modified_at, last_accessed, access_count 
519             FROM all_files WHERE name LIKE ?1 AND is_directory = FALSE
520             ORDER BY name ASC"
521        )?;
522
523        let files = stmt
524            .query_map([pattern], |row| {
525                Ok(AllFile {
526                    id: row.get(0)?,
527                    name: row.get(1)?,
528                    path: row.get(2)?,
529                    parent_id: row.get(3)?,
530                    is_directory: row.get(4)?,
531                    file_path: row.get(5)?,
532                    size_bytes: row.get(6)?,
533                    mime_type: row.get(7)?,
534                    created_at: row
535                        .get::<_, String>(8)?
536                        .parse()
537                        .unwrap_or_else(|_| Utc::now()),
538                    modified_at: row
539                        .get::<_, String>(9)?
540                        .parse()
541                        .unwrap_or_else(|_| Utc::now()),
542                    last_accessed: row
543                        .get::<_, Option<String>>(10)?
544                        .and_then(|s| s.parse().ok()),
545                    access_count: row.get(11)?,
546                })
547            })?
548            .collect::<Result<Vec<_>, _>>()?;
549
550        Ok(files)
551    }
552}