kobo_db_tools/parser/
parser.rs

1use crate::{
2    get_bookmarks, Bookmark, Brightness, BrightnessEvent, BrightnessHistory, DictionaryWord,
3    NaturalLightHistory, ReadingSession, ReadingSessions,
4};
5use chrono::{DateTime, Utc};
6use rusqlite::Connection;
7use std::collections::HashMap;
8use std::path::Path;
9use std::str::FromStr;
10use thiserror::Error;
11use uuid::Uuid;
12
13#[derive(Debug, Error)]
14pub enum ParseError {
15    #[error("Event is not valid")]
16    InvalidEventType,
17    #[error("Error during session completation")]
18    SessionCompletionFailed,
19    #[error("Error during deserialize")]
20    DeserializationError,
21}
22
23#[derive(serde::Deserialize, Clone)]
24struct ReadingSessionAttributes {
25    progress: String,
26    volumeid: Option<String>,
27    title: Option<String>,
28}
29
30#[derive(serde::Deserialize)]
31struct LeaveContentMetrics {
32    #[serde(rename = "ButtonPressCount")]
33    button_press_count: usize,
34    #[serde(rename = "SecondsRead")]
35    seconds_read: usize,
36    #[serde(rename = "PagesTurned")]
37    pages_turned: usize,
38}
39
40#[derive(serde::Deserialize)]
41struct LightAttributes {
42    #[serde(rename = "Method")]
43    method: String,
44}
45
46#[derive(serde::Deserialize)]
47struct LightMetrics {
48    #[serde(alias = "NewNaturalLight")]
49    #[serde(alias = "NewBrightness")]
50    new_light: u8,
51}
52
53#[derive(serde::Deserialize)]
54struct DicitonaryAttributes {
55    #[serde(rename = "Dictionary")]
56    lang: String,
57    #[serde(rename = "Word")]
58    word: String,
59}
60
61#[derive(Debug, PartialEq)]
62pub enum ParseOption {
63    All,
64    ReadingSessions,
65    DictionaryLookups,
66    BrightnessHistory,
67    NaturalLightHistory,
68    Bookmarks,
69}
70
71#[derive(Debug, Default)]
72pub struct EventAnalysis {
73    pub sessions: Option<ReadingSessions>,
74    pub terms: Option<HashMap<DictionaryWord, usize>>,
75    pub brightness_history: Option<BrightnessHistory>,
76    pub natural_light_history: Option<NaturalLightHistory>,
77    pub bookmarks: Option<Vec<Bookmark>>,
78}
79
80pub struct Parser;
81
82impl Parser {
83    pub fn parse_events(db: &Connection, option: ParseOption) -> rusqlite::Result<EventAnalysis> {
84        let mut analysis = EventAnalysis::default();
85
86        let mut event_types_to_query = Vec::new();
87        let mut get_bookmarks_flag = false;
88
89        match option {
90            ParseOption::All => {
91                event_types_to_query.extend_from_slice(&[
92                    "'OpenContent'",
93                    "'LeaveContent'",
94                    "'DictionaryLookup'",
95                    "'BrightnessAdjusted'",
96                    "'NaturalLightAdjusted'",
97                ]);
98                get_bookmarks_flag = true;
99            }
100            ParseOption::ReadingSessions => {
101                event_types_to_query.extend_from_slice(&["'OpenContent'", "'LeaveContent'"]);
102            }
103            ParseOption::DictionaryLookups => {
104                event_types_to_query.push("'DictionaryLookup'");
105            }
106            ParseOption::BrightnessHistory => {
107                event_types_to_query.push("'BrightnessAdjusted'");
108            }
109            ParseOption::NaturalLightHistory => {
110                event_types_to_query.push("'NaturalLightAdjusted'");
111            }
112            ParseOption::Bookmarks => {
113                get_bookmarks_flag = true;
114            }
115        }
116
117        if get_bookmarks_flag {
118            analysis.bookmarks = Some(get_bookmarks(db)?);
119        }
120
121        if !event_types_to_query.is_empty() {
122            let q = format!(
123                "SELECT Id, Type, Timestamp, Attributes, Metrics FROM AnalyticsEvents WHERE Type IN ({}) ORDER BY Timestamp ASC;",
124                event_types_to_query.join(", ")
125            );
126
127            let mut stmt = db.prepare(&q)?;
128            let mut rows = stmt.query([])?;
129
130            let mut current_session: Option<ReadingSession> = None;
131            let mut sessions_vec = ReadingSessions::new();
132            let mut terms_map = HashMap::new();
133            let mut brightness_hist = BrightnessHistory::new();
134            let mut natural_light_hist = NaturalLightHistory::new();
135
136            while let Some(row) = rows.next()? {
137                let event_id: String = row.get("Id")?;
138                let event_type: String = row.get("Type")?;
139                let ts_str: String = row.get("Timestamp")?;
140                let ts = DateTime::<Utc>::from_str(&ts_str).map_err(|e| {
141                    rusqlite::Error::FromSqlConversionFailure(
142                        0,
143                        rusqlite::types::Type::Text,
144                        Box::new(e),
145                    )
146                })?;
147
148                match event_type.as_str() {
149                    "OpenContent" | "LeaveContent" => {
150                        if option == ParseOption::All || option == ParseOption::ReadingSessions {
151                            let attr_json: String = row.get("Attributes")?;
152                            let attr: ReadingSessionAttributes = serde_json::from_str(&attr_json)
153                                .map_err(|e| {
154                                rusqlite::Error::FromSqlConversionFailure(
155                                    1,
156                                    rusqlite::types::Type::Text,
157                                    Box::new(e),
158                                )
159                            })?;
160                            let progress = attr.progress.parse::<u8>().unwrap_or(0);
161
162                            let metrics = if event_type == "LeaveContent" {
163                                let metr_json: String = row.get("Metrics")?;
164                                Some(
165                                    serde_json::from_str::<LeaveContentMetrics>(&metr_json)
166                                        .map_err(|e| {
167                                            rusqlite::Error::FromSqlConversionFailure(
168                                                2,
169                                                rusqlite::types::Type::Text,
170                                                Box::new(e),
171                                            )
172                                        })?,
173                                )
174                            } else {
175                                None
176                            };
177
178                            match handle_reading_session_event(
179                                &event_type,
180                                &event_id,
181                                &mut current_session,
182                                ts,
183                                progress,
184                                &attr,
185                                metrics,
186                            ) {
187                                Ok(Some(session)) => sessions_vec.add_session(session),
188                                Ok(None) => {}
189                                Err(e) => eprintln!("Errore evento {}: {:?}", &event_id, e),
190                            }
191                        }
192                    }
193                    "DictionaryLookup" => {
194                        if option == ParseOption::All || option == ParseOption::DictionaryLookups {
195                            let session_id = current_session.as_ref().map(|s| s.id);
196                            let attr_json: String = row.get("Attributes")?;
197                            *terms_map
198                                .entry(on_dictionary_lookup(attr_json,session_id)?)
199                                .or_insert(0) += 1;
200                        }
201                    }
202                    "BrightnessAdjusted" => {
203                        if option == ParseOption::All || option == ParseOption::BrightnessHistory {
204                            let attr_json: String = row.get("Attributes")?;
205                            let metr_json: String = row.get("Metrics")?;
206                            let event = on_light_adjusted(attr_json, metr_json, ts)?;
207                            brightness_hist.insert(event);
208                        }
209                    }
210                    "NaturalLightAdjusted" => {
211                        if option == ParseOption::All || option == ParseOption::NaturalLightHistory
212                        {
213                            let attr_json: String = row.get("Attributes")?;
214                            let metr_json: String = row.get("Metrics")?;
215                            let event = on_light_adjusted(attr_json, metr_json, ts)?;
216                            natural_light_hist.insert(event);
217                        }
218                    }
219                    _ => {
220                        eprintln!("Unknown event: {}", event_type);
221                    }
222                }
223            }
224            if option == ParseOption::All || option == ParseOption::ReadingSessions {
225                analysis.sessions = Some(sessions_vec);
226            }
227            if option == ParseOption::All || option == ParseOption::DictionaryLookups {
228                analysis.terms = Some(terms_map);
229            }
230            if option == ParseOption::All || option == ParseOption::BrightnessHistory {
231                analysis.brightness_history = Some(brightness_hist);
232            }
233            if option == ParseOption::All || option == ParseOption::NaturalLightHistory {
234                analysis.natural_light_history = Some(natural_light_hist);
235            }
236        }
237        Ok(analysis)
238    }
239    pub fn parse_from_str<P: AsRef<Path>>(
240        path: P,
241        option: ParseOption,
242    ) -> rusqlite::Result<EventAnalysis> {
243        let conn = Connection::open(path)?;
244        Self::parse_events(&conn, option)
245    }
246}
247
248fn handle_reading_session_event(
249    event_type: &str,
250    event_id: &str,
251    current_session: &mut Option<ReadingSession>,
252    ts: DateTime<Utc>,
253    progress: u8,
254    attr: &ReadingSessionAttributes,
255    metrics: Option<LeaveContentMetrics>,
256) -> Result<Option<ReadingSession>, ParseError> {
257    match event_type {
258        "OpenContent" => {
259            *current_session = Some(ReadingSession::new(
260                ts,
261                progress,
262                attr.title.clone(),
263                attr.volumeid.clone(),
264                event_id.to_string(),
265            ));
266            Ok(None)
267        }
268        "LeaveContent" => {
269            if let Some(ref mut session) = current_session {
270                let _open_content_id = session.open_content_id.clone();
271                let m = metrics.ok_or(ParseError::SessionCompletionFailed)?;
272                session
273                    .complete_session(
274                        ts,
275                        progress,
276                        m.button_press_count as u64,
277                        m.seconds_read as u64,
278                        m.pages_turned as u64,
279                        event_id.to_string(),
280                    )
281                    .map_err(|_| ParseError::SessionCompletionFailed)?;
282
283                let completed = std::mem::take(session);
284                *current_session = None;
285                Ok(Some(completed))
286            } else {
287                Err(ParseError::SessionCompletionFailed)
288            }
289        }
290        _ => Err(ParseError::InvalidEventType),
291    }
292}
293
294fn on_dictionary_lookup(attr_json: String,session_id:Option<Uuid>) -> rusqlite::Result<DictionaryWord> {
295    let attr: DicitonaryAttributes = serde_json::from_str(&attr_json).map_err(|e| {
296        rusqlite::Error::FromSqlConversionFailure(1, rusqlite::types::Type::Text, Box::new(e))
297    })?;
298    Ok(DictionaryWord::new(attr.word, attr.lang,session_id))
299}
300
301fn on_light_adjusted(
302    attr_json: String,
303    metr_json: String,
304    ts: DateTime<Utc>,
305) -> rusqlite::Result<BrightnessEvent> {
306    let attributes: LightAttributes = serde_json::from_str(&attr_json).map_err(|e| {
307        rusqlite::Error::FromSqlConversionFailure(1, rusqlite::types::Type::Text, Box::new(e))
308    })?;
309    let metrics: LightMetrics = serde_json::from_str(&metr_json).map_err(|e| {
310        rusqlite::Error::FromSqlConversionFailure(1, rusqlite::types::Type::Text, Box::new(e))
311    })?;
312    let brightness = Brightness::new(attributes.method, metrics.new_light);
313    Ok(BrightnessEvent::new(brightness, ts))
314}
315
316#[cfg(test)]
317mod tests {
318    use super::{Parser, ParseOption};
319    use rusqlite::Connection;
320
321    fn setup_test_db() -> Connection {
322        let conn = Connection::open_in_memory().unwrap();
323        conn.execute_batch(
324            "CREATE TABLE AnalyticsEvents (\n                Id TEXT PRIMARY KEY,\n                Type TEXT NOT NULL,\n                Timestamp TEXT NOT NULL,\n                Attributes TEXT,\n                Metrics TEXT\n            );\n            CREATE TABLE content (\n                ContentID TEXT PRIMARY KEY,\n                Title TEXT\n            );\n            CREATE TABLE Bookmark (\n                BookmarkID TEXT PRIMARY KEY,\n                Text TEXT,\n                VolumeID TEXT,\n                Color INTEGER,\n                ChapterProgress REAL,\n                DateCreated TEXT,\n                DateModified TEXT\n            );",
325        )
326        .unwrap();
327        conn
328    }
329
330    #[test]
331    fn test_parse_events_all() {
332        let db = setup_test_db();
333
334        // Insert sample data
335        db.execute(
336            "INSERT INTO AnalyticsEvents (Id, Type, Timestamp, Attributes, Metrics) VALUES (?, ?, ?, ?, ?)",
337            &[
338                "session1_open",
339                "OpenContent",
340                "2023-01-01T10:00:00Z",
341                "{\"progress\":\"0\",\"volumeid\":\"book1\",\"title\":\"Book One\"}", ""
342            ],
343        ).unwrap();
344        db.execute(
345            "INSERT INTO AnalyticsEvents (Id, Type, Timestamp, Attributes, Metrics) VALUES (?, ?, ?, ?, ?)",
346            &[
347                "session1_leave",
348                "LeaveContent",
349                "2023-01-01T10:05:00Z",
350                "{\"progress\":\"10\",\"volumeid\":\"book1\",\"title\":\"Book One\"}", "{\"ButtonPressCount\":10,\"SecondsRead\":300,\"PagesTurned\":5}"
351            ],
352        ).unwrap();
353        db.execute(
354            "INSERT INTO AnalyticsEvents (Id, Type, Timestamp, Attributes, Metrics) VALUES (?, ?, ?, ?, ?)",
355            &[
356                "dict_lookup1",
357                "DictionaryLookup",
358                "2023-01-01T10:01:00Z",
359                "{\"Dictionary\":\"en\",\"Word\":\"test\"}", ""
360            ],
361        ).unwrap();
362        db.execute(
363            "INSERT INTO AnalyticsEvents (Id, Type, Timestamp, Attributes, Metrics) VALUES (?, ?, ?, ?, ?)",
364            &[
365                "brightness_adj1",
366                "BrightnessAdjusted",
367                "2023-01-01T10:02:00Z",
368                "{\"Method\":\"manual\"}", "{\"NewBrightness\":50}"
369            ],
370        ).unwrap();
371        db.execute(
372            "INSERT INTO AnalyticsEvents (Id, Type, Timestamp, Attributes, Metrics) VALUES (?, ?, ?, ?, ?)",
373            &[
374                "natural_light_adj1",
375                "NaturalLightAdjusted",
376                "2023-01-01T10:03:00Z",
377                "{\"Method\":\"auto\"}", "{\"NewNaturalLight\":70}"
378            ],
379        ).unwrap();
380
381        db.execute(
382            "INSERT INTO content (ContentID, Title) VALUES (?, ?)",
383            &["book1", "Book One"],
384        )
385        .unwrap();
386        db.execute(
387            "INSERT INTO Bookmark (BookmarkID, Text, VolumeID, Color, ChapterProgress, DateCreated, DateModified) VALUES (?, ?, ?, ?, ?, ?, ?)",
388            &["bookmark1", "Some text", "book1", "1", "0.5", "2023-01-01T10:06:00Z", "2023-01-01T10:06:00Z"],
389        ).unwrap();
390
391        let analysis = Parser::parse_events(&db, ParseOption::All).unwrap();
392
393        assert!(analysis.sessions.is_some());
394        assert_eq!(analysis.sessions.unwrap().sessions_count(), 1);
395
396        assert!(analysis.terms.is_some());
397        assert_eq!(analysis.terms.unwrap().len(), 1);
398
399        assert!(analysis.brightness_history.is_some());
400        assert_eq!(analysis.brightness_history.unwrap().events.len(), 1);
401
402        assert!(analysis.natural_light_history.is_some());
403        assert_eq!(analysis.natural_light_history.unwrap().events.len(), 1);
404
405        assert!(analysis.bookmarks.is_some());
406        assert_eq!(analysis.bookmarks.unwrap().len(), 1);
407    }
408
409    #[test]
410    fn test_parse_events_reading_sessions() {
411        let db = setup_test_db();
412        db.execute(
413            "INSERT INTO AnalyticsEvents (Id, Type, Timestamp, Attributes, Metrics) VALUES (?, ?, ?, ?, ?)",
414            &[
415                "session1_open",
416                "OpenContent",
417                "2023-01-01T10:00:00Z",
418                "{\"progress\":\"0\",\"volumeid\":\"book1\",\"title\":\"Book One\"}", ""
419            ],
420        ).unwrap();
421        db.execute(
422            "INSERT INTO AnalyticsEvents (Id, Type, Timestamp, Attributes, Metrics) VALUES (?, ?, ?, ?, ?)",
423            &[
424                "session1_leave",
425                "LeaveContent",
426                "2023-01-01T10:05:00Z",
427                "{\"progress\":\"10\",\"volumeid\":\"book1\",\"title\":\"Book One\"}", "{\"ButtonPressCount\":10,\"SecondsRead\":300,\"PagesTurned\":5}"
428            ],
429        ).unwrap();
430
431        let analysis = Parser::parse_events(&db, ParseOption::ReadingSessions).unwrap();
432
433        assert!(analysis.sessions.is_some());
434        assert_eq!(analysis.sessions.unwrap().sessions_count(), 1);
435        assert!(analysis.terms.is_none());
436        assert!(analysis.brightness_history.is_none());
437        assert!(analysis.natural_light_history.is_none());
438        assert!(analysis.bookmarks.is_none());
439    }
440
441    #[test]
442    fn test_parse_events_bookmarks() {
443        let db = setup_test_db();
444        db.execute(
445            "INSERT INTO content (ContentID, Title) VALUES (?, ?)",
446            &["book1", "Book One"],
447        )
448        .unwrap();
449        db.execute(
450            "INSERT INTO Bookmark (BookmarkID, Text, VolumeID, Color, ChapterProgress, DateCreated, DateModified) VALUES (?, ?, ?, ?, ?, ?, ?)",
451            &["bookmark1", "Some text", "book1", "1", "0.5", "2023-01-01T10:06:00Z", "2023-01-01T10:06:00Z"],
452        ).unwrap();
453
454        let analysis = Parser::parse_events(&db, ParseOption::Bookmarks).unwrap();
455
456        assert!(analysis.sessions.is_none());
457        assert!(analysis.terms.is_none());
458        assert!(analysis.brightness_history.is_none());
459        assert!(analysis.natural_light_history.is_none());
460        assert!(analysis.bookmarks.is_some());
461        assert_eq!(analysis.bookmarks.unwrap().len(), 1);
462    }
463}