1use crate::{
2 library::{epub_guard, epub_meta, hashing, normalise, reanchor},
3 store::{
4 books::{resolve_identity, upsert, BookRow, IdentityMatch},
5 db::Db,
6 },
7};
8use std::path::Path;
9
10#[derive(Debug, Default)]
11pub struct ScanReport {
12 pub inserted: usize,
13 pub updated: usize,
14 pub skipped: usize,
15 pub errors: Vec<(std::path::PathBuf, String)>,
16}
17
18pub fn scan_folder(dir: &Path, db: &Db) -> anyhow::Result<ScanReport> {
19 let mut report = ScanReport::default();
20 let mut conn = db.conn()?;
21 for entry in walkdir(dir) {
22 let path = entry;
23 if path.extension().and_then(|s| s.to_str()) != Some("epub") {
24 continue;
25 }
26
27 if let Err(e) = epub_guard::validate_archive(&path, epub_guard::Limits::default()) {
28 let err_string = e.to_string();
29 record_broken(&mut conn, &path, &err_string);
30 report.errors.push((path.clone(), err_string));
31 continue;
32 }
33
34 let meta = match epub_meta::extract(&path) {
35 Ok(m) => m,
36 Err(e) => {
37 let err_string = e.to_string();
38 record_broken(&mut conn, &path, &err_string);
39 report.errors.push((path.clone(), err_string));
40 continue;
41 }
42 };
43
44 let file_hash = hashing::sha256_file(&path).ok();
45 let row = BookRow {
46 stable_id: meta.stable_id.clone(),
47 file_hash,
48 title_norm: normalise::normalise_text(&meta.title),
49 author_norm: meta.author.as_deref().map(normalise::normalise_author),
50 path: path.to_string_lossy().to_string(),
51 title: meta.title,
52 author: meta.author,
53 language: meta.language,
54 publisher: meta.publisher,
55 published_at: meta.published_at,
56 word_count: meta.word_count,
57 page_count: meta.word_count.map(|w| (w / 275).max(1)),
58 parse_error: None,
59 };
60 let pre_hash: Option<String> = match resolve_identity(&conn, &row)? {
63 Some(
64 IdentityMatch::ById(id) | IdentityMatch::ByHash(id) | IdentityMatch::ByNorm(id),
65 ) => conn
66 .query_row(
67 "SELECT file_hash FROM books WHERE id = ?",
68 rusqlite::params![id],
69 |r| r.get::<_, Option<String>>(0),
70 )
71 .ok()
72 .flatten(),
73 None => None,
74 };
75
76 let book_id = upsert(&mut conn, &row)?;
78
79 if let (Some(pre), Some(post)) = (pre_hash, row.file_hash.as_ref()) {
82 if pre != *post {
83 let _ = reanchor::reanchor_book(db, book_id, &path);
84 }
85 }
86
87 report.inserted += 1;
88 }
89
90 let dir_prefix = dir.to_string_lossy().to_string();
92 let orphaned: Vec<(i64, String)> = conn
93 .prepare("SELECT id, path FROM books WHERE deleted_at IS NULL AND path LIKE ? || '%'")?
94 .query_map(rusqlite::params![dir_prefix], |r| {
95 Ok((r.get(0)?, r.get(1)?))
96 })?
97 .collect::<Result<_, _>>()?;
98
99 for (id, p) in orphaned {
100 if !std::path::Path::new(&p).exists() {
101 conn.execute(
102 "UPDATE books SET deleted_at = CURRENT_TIMESTAMP WHERE id = ?",
103 rusqlite::params![id],
104 )?;
105 }
106 }
107
108 Ok(report)
109}
110
111fn record_broken(conn: &mut rusqlite::Connection, path: &Path, err_string: &str) {
115 let title = path
116 .file_name()
117 .and_then(|s| s.to_str())
118 .unwrap_or("unknown.epub")
119 .to_string();
120 let row = BookRow {
121 stable_id: None,
122 file_hash: hashing::sha256_file(path).ok(),
123 title_norm: normalise::normalise_text(&title),
124 author_norm: None,
125 path: path.to_string_lossy().to_string(),
126 title,
127 author: None,
128 language: None,
129 publisher: None,
130 published_at: None,
131 word_count: None,
132 page_count: None,
133 parse_error: Some(err_string.to_string()),
134 };
135 let _ = upsert(conn, &row);
136}
137
138fn walkdir(dir: &Path) -> Vec<std::path::PathBuf> {
139 let mut out = Vec::new();
140 if let Ok(rd) = std::fs::read_dir(dir) {
141 for e in rd.flatten() {
142 let p = e.path();
143 if p.is_dir() {
144 out.extend(walkdir(&p));
145 } else {
146 out.push(p);
147 }
148 }
149 }
150 out
151}