gutenberg_rs/
sqlite_cache.rs

1use crate::error::Error;
2use crate::fst_parser::DictionaryItemContent;
3use crate::fst_parser::ParseResult;
4use crate::fst_parser_type::ParseType;
5use crate::settings::GutenbergCacheSettings;
6use indexmap::IndexMap;
7use indicatif::{ProgressBar, ProgressStyle};
8use num_traits::FromPrimitive;
9use rusqlite::Connection;
10use serde_json::Value;
11use std::fs;
12use std::path::Path;
13
14pub struct SQLiteCache {
15    pub connection: Box<Connection>,
16}
17struct HelperQuery<'a> {
18    tables: Vec<&'a str>,
19    query_struct: Vec<&'a str>,
20}
21
22impl SQLiteCache {
23    pub fn get_download_links(&mut self, ids: Vec<i32>) -> Result<Vec<String>, Error> {
24        let ids_collect = ids.iter().map(|x| x.to_string()).collect::<Vec<String>>();
25        let ids_str = ids_collect.join(",");
26        let q = format!("SELECT downloadlinks.name FROM downloadlinks, books WHERE downloadlinks.bookid = books.id AND books.gutenbergbookid IN ({}) AND downloadlinks.downloadtypeid in (5, 6, 10,13,26,27,28,33,34,35,40,46,49,51)", ids_str);
27        let mut stmt = self.connection.prepare(&q)?;
28        let mut rows = stmt.query(())?;
29        let mut results = Vec::new();
30        while let Some(row) = rows.next()? {
31            results.push(row.get(0)?);
32        }
33        Ok(results)
34    }
35
36    pub fn query(&mut self, json: &Value) -> Result<Vec<i32>, Error> {
37        let mut helpers = Vec::new();
38
39        if let Some(field) = json.get("language") {
40            if let Some(field_value) = field.as_str() {
41                helpers.push(HelperQuery {
42                    tables: vec!["languages", "book_languages"],
43                    query_struct: vec![
44                        "languages.id = book_languages.languageid AND books.id = book_languages.bookid",
45                        "languages.name",
46                        field_value,
47                    ],
48                });
49            } else {
50                return Err(Error::InvalidQuery("language must be a string".to_string()));
51            }
52        }
53        if let Some(field) = json.get("author") {
54            if let Some(field_value) = field.as_str() {
55                helpers.push(HelperQuery {
56                    tables: vec!["authors", "book_authors"],
57                    query_struct: vec![
58                        "authors.id = book_authors.authorid and books.id = book_authors.bookid",
59                        "authors.name",
60                        field_value,
61                    ],
62                });
63            } else {
64                return Err(Error::InvalidQuery("author must be a string".to_string()));
65            }
66        }
67        if let Some(field) = json.get("title") {
68            if let Some(field_value) = field.as_str() {
69                helpers.push(HelperQuery {
70                    tables: vec!["titles"],
71                    query_struct: vec!["titles.bookid = books.id", "titles.name", field_value],
72                });
73            } else {
74                return Err(Error::InvalidQuery("title must a string".to_string()));
75            }
76        }
77        if let Some(field) = json.get("subject") {
78            if let Some(field_value) = field.as_str() {
79                helpers.push(HelperQuery {
80                    tables: vec!["subjects", "book_subjects"],
81                    query_struct: vec![
82                        "subjects.id = book_subjects.bookid and books.id = book_subjects.subjectid ",
83                        "subjects.name",
84                        field_value,
85                    ],
86                });
87            } else {
88                return Err(Error::InvalidQuery("subject must a string".to_string()));
89            }
90        }
91        if let Some(field) = json.get("publisher") {
92            if let Some(field_value) = field.as_str() {
93                helpers.push(HelperQuery {
94                    tables: vec!["publishers"],
95                    query_struct: vec![
96                        "publishers.id = books.publisherid",
97                        "publishers.name",
98                        field_value,
99                    ],
100                });
101            } else {
102                return Err(Error::InvalidQuery("publisher must a string".to_string()));
103            }
104        }
105        if let Some(field) = json.get("bookshelve") {
106            if let Some(field_value) = field.as_str() {
107                helpers.push(HelperQuery {
108                    tables: vec!["bookshelves, book_bookshelves"],
109                    query_struct: vec![
110                        "bookshelves.id = book_bookshelves.bookshelfid AND books.id = book_bookshelves.bookid",
111                        "bookshelves.name",
112                        field_value,
113                    ],
114                });
115            } else {
116                return Err(Error::InvalidQuery("bookshelve must a string".to_string()));
117            }
118        }
119        if let Some(field) = json.get("rights") {
120            if let Some(field_value) = field.as_str() {
121                helpers.push(HelperQuery {
122                    tables: vec!["rights"],
123                    query_struct: vec!["rights.id = books.rightsid", "rights.name", field_value],
124                });
125            } else {
126                return Err(Error::InvalidQuery("rights must be a string".to_string()));
127            }
128        }
129        if let Some(field) = json.get("downloadlinkstype") {
130            if let Some(field_value) = field.as_str() {
131                helpers.push(HelperQuery{tables: vec!["downloadlinks", "downloadlinkstype"],
132                            query_struct: vec!["downloadlinks.downloadtypeid =  downloadlinkstype.id and downloadlinks.bookid = books.id",
133                            "downloadlinkstype.name",
134                            field_value]});
135            } else {
136                return Err(Error::InvalidQuery(
137                    "downloadlinkstype must a string".to_string(),
138                ));
139            }
140        }
141
142        let mut query = "SELECT DISTINCT books.gutenbergbookid FROM books".to_string();
143        for q in &helpers {
144            query = format!("{},{}", query, q.tables.join(","))
145        }
146
147        query = format!("{} WHERE ", query);
148        let mut idx = 0;
149        for q in &helpers {
150            query = format!(
151                "{} {} and {} in ({}) ",
152                query, q.query_struct[0], q.query_struct[1], q.query_struct[2]
153            );
154            if idx != helpers.len() - 1 {
155                query = format!("{} and ", query);
156            }
157            idx = idx + 1;
158        }
159        let mut stmt = self.connection.prepare(&query)?;
160        let mut rows = stmt.query(())?;
161        let mut results = Vec::new();
162        while let Some(row) = rows.next()? {
163            results.push(row.get(0)?);
164        }
165
166        Ok(results)
167    }
168}
169
170impl SQLiteCache {
171    pub fn get_cache(settings: &GutenbergCacheSettings) -> Result<SQLiteCache, Error> {
172        if Path::new(&settings.cache_filename).exists() {
173            let connection = Box::new(Connection::open(&settings.cache_filename)?);
174            return Ok(SQLiteCache { connection });
175        }
176        Err(Error::InvalidIO(
177            format!("No cache file {}", settings.cache_filename).to_string(),
178        ))
179    }
180
181    pub fn create_cache(
182        parse_results: &ParseResult,
183        settings: &GutenbergCacheSettings,
184        force_recreate: bool,
185        show_progress_bar: bool,
186    ) -> Result<SQLiteCache, Error> {
187        if Path::new(&settings.cache_filename).exists() && !settings.db_in_memory {
188            if force_recreate {
189                fs::remove_file(&settings.cache_filename)?;
190            } else {
191                let connection = Box::new(Connection::open(&settings.cache_filename)?);
192                return Ok(SQLiteCache { connection });
193            }
194        }
195        let mut connection = match settings.db_in_memory {
196            false => Box::new(Connection::open(&settings.cache_filename)?),
197            true => Box::new(Connection::open(":memory:")?),
198        };
199        let create_query = include_str!("gutenbergindex.db.sql");
200        connection.execute_batch(create_query)?;
201        connection.execute_batch("PRAGMA journal_mode = OFF;PRAGMA synchronous = 0;PRAGMA cache_size = 1000000;PRAGMA locking_mode = EXCLUSIVE;PRAGMA temp_store = MEMORY;")?;
202
203        let mut book_id = 0;
204
205        let mut pb_fields: Option<ProgressBar> = None;
206        if show_progress_bar {
207            let pb = ProgressBar::new(parse_results.field_dictionaries.len() as u64);
208            pb.set_style(
209                ProgressStyle::with_template(
210                    "{msg}\n{spinner:.green} [{elapsed_precise}] [{wide_bar:.white/blue}] ({eta})",
211                )?
212                .progress_chars("█  "),
213            );
214            pb_fields = Some(pb);
215        }
216
217        for (idx, result) in parse_results.field_dictionaries.iter().enumerate() {
218            book_id = book_id + 1;
219            if let Some(pb) = &mut pb_fields {
220                pb.set_position((idx + 1) as u64);
221            }
222
223            match FromPrimitive::from_usize(idx) {
224                Some(ParseType::Title) => {
225                    SQLiteCache::insert_many_field_id(
226                        &mut connection,
227                        "titles",
228                        "name",
229                        "bookid",
230                        result,
231                        book_id,
232                    )?;
233                }
234                Some(ParseType::Subject) => {
235                    SQLiteCache::insert_many_fields(&mut connection, "subjects", "name", result)?;
236                }
237                Some(ParseType::Language) => {
238                    SQLiteCache::insert_many_fields(&mut connection, "languages", "name", result)?;
239                }
240                Some(ParseType::Author) => {
241                    SQLiteCache::insert_many_fields(&mut connection, "authors", "name", result)?;
242                }
243                Some(ParseType::Bookshelf) => {
244                    SQLiteCache::insert_many_fields(
245                        &mut connection,
246                        "bookshelves",
247                        "name",
248                        result,
249                    )?;
250                }
251                Some(ParseType::Publisher) => {
252                    SQLiteCache::insert_many_fields(&mut connection, "publishers", "name", result)?;
253                }
254                Some(ParseType::Rights) => {
255                    SQLiteCache::insert_many_fields(&mut connection, "rights", "name", result)?;
256                }
257                _ => {}
258            }
259        }
260        if let Some(pb) = pb_fields {
261            pb.finish();
262        }
263        SQLiteCache::insert_many_fields(
264            &mut connection,
265            "downloadlinkstype",
266            "name",
267            &parse_results.file_types_dictionary,
268        )?;
269        let mut pb_all: Option<ProgressBar> = None;
270        if show_progress_bar {
271            let pb = ProgressBar::new(parse_results.books.len() as u64);
272            pb.set_style(
273                ProgressStyle::with_template(
274                    "{msg}\n{spinner:.green} [{elapsed_precise}] [{wide_bar:.white/blue}] ({eta})",
275                )?
276                .progress_chars("█  "),
277            );
278
279            pb.set_message(format!("Building sqlite db"));
280            pb_all = Some(pb);
281        }
282
283        for (idx, book) in parse_results.books.iter().enumerate() {
284            if let Some(pb) = &mut pb_all {
285                pb.set_position(idx as u64);
286            }
287            let pairs_book_authors = book
288                .author_ids
289                .iter()
290                .map(|x| (*x + 1, idx + 1))
291                .collect::<Vec<(usize, usize)>>();
292            let pairs_book_subjects = book
293                .subject_ids
294                .iter()
295                .map(|x| (*x + 1, idx + 1))
296                .collect::<Vec<(usize, usize)>>();
297            let pairs_book_languages = book
298                .language_ids
299                .iter()
300                .map(|x| (*x + 1, idx + 1))
301                .collect::<Vec<(usize, usize)>>();
302            let pairs_book_bookshelves = book
303                .bookshelf_ids
304                .iter()
305                .map(|x| (*x + 1, idx + 1))
306                .collect::<Vec<(usize, usize)>>();
307
308            SQLiteCache::insert_links(
309                &mut connection,
310                pairs_book_authors,
311                "book_authors",
312                "authorid",
313                "bookid",
314            )?;
315            SQLiteCache::insert_links(
316                &mut connection,
317                pairs_book_subjects,
318                "book_subjects",
319                "subjectid",
320                "bookid",
321            )?;
322            SQLiteCache::insert_links(
323                &mut connection,
324                pairs_book_languages,
325                "book_languages",
326                "languageid",
327                "bookid",
328            )?;
329            SQLiteCache::insert_links(
330                &mut connection,
331                pairs_book_bookshelves,
332                "book_bookshelves",
333                "bookshelfid",
334                "bookid",
335            )?;
336
337            let query = format!(
338                "INSERT OR IGNORE INTO downloadlinks(name, downloadtypeid, bookid) VALUES (?,?,?)"
339            );
340
341            let mut smt = connection.prepare(query.as_str())?;
342            for item in book.files.iter() {
343                let mut file_link = "";
344                if let Some(file_link_item) = parse_results
345                    .files_dictionary
346                    .get_index(item.file_link_id as usize)
347                {
348                    file_link = file_link_item.0;
349                }
350                smt.execute([
351                    file_link,
352                    item.file_type_id.to_string().as_str(),
353                    (idx + 1).to_string().as_str(),
354                ])?;
355            }
356
357            connection.execute("INSERT OR IGNORE INTO books(publisherid,rightsid,numdownloads,gutenbergbookid) VALUES (?,?,?,?)"
358            , (book.publisher_id, book.rights_id,
359            book.num_downloads,book.gutenberg_book_id))?;
360        }
361        let create_query = include_str!("gutenbergindex_indices.db.sql");
362        connection.execute_batch(create_query)?;
363
364        if let Some(pb) = pb_all {
365            pb.finish();
366        }
367
368        Ok(SQLiteCache { connection })
369    }
370
371    fn insert_links(
372        connection: &mut Connection,
373        links: Vec<(usize, usize)>,
374        table_name: &str,
375        link1_name: &str,
376        link2_name: &str,
377    ) -> Result<(), Error> {
378        if links.is_empty() {
379            return Ok(());
380        }
381        let query = std::format!(
382            "INSERT INTO {}({},{}) VALUES (?,?)",
383            table_name,
384            link1_name,
385            link2_name
386        );
387
388        let mut smt = connection.prepare(query.as_str())?;
389        for item in links.iter() {
390            smt.execute([item.0.to_string(), item.1.to_string()])?;
391        }
392        Ok(())
393    }
394    fn insert_many_fields(
395        connection: &mut Connection,
396        table: &str,
397        field: &str,
398        field_dictionary: &IndexMap<String, DictionaryItemContent>,
399    ) -> Result<(), Error> {
400        if field_dictionary.is_empty() {
401            return Ok(());
402        }
403
404        let query = std::format!("INSERT OR IGNORE INTO {}({}) VALUES(?)", table, field);
405
406        let mut smt = connection.prepare(query.as_str())?;
407        for item in field_dictionary.iter() {
408            smt.execute([item.0.as_str()])?;
409        }
410        Ok(())
411    }
412
413    fn insert_many_field_id(
414        connection: &mut Connection,
415        table: &str,
416        field1: &str,
417        field2: &str,
418        field_dictionary: &IndexMap<String, DictionaryItemContent>,
419        _book_id: usize,
420    ) -> Result<(), Error> {
421        if field_dictionary.is_empty() {
422            return Ok(());
423        }
424
425        let query = format!(
426            "INSERT OR IGNORE INTO {}({}, {}) VALUES (?,?)",
427            table, field1, field2
428        );
429
430        let mut smt = connection.prepare(query.as_str())?;
431        for item in field_dictionary.iter() {
432            for book_id in &item.1.book_links {
433                smt.execute([item.0.as_str(), book_id.to_string().as_str()])?;
434            }
435        }
436        Ok(())
437    }
438}