ankit_engine/
analyze.rs

1//! Study statistics and problem card detection.
2//!
3//! This module provides analytics workflows for understanding study
4//! patterns and identifying cards that need attention.
5
6use std::collections::HashMap;
7
8use crate::Result;
9use ankit::AnkiClient;
10use serde::Serialize;
11
12/// Summary of study activity.
13#[derive(Debug, Clone, Default, Serialize)]
14pub struct StudySummary {
15    /// Total number of reviews in the period.
16    pub total_reviews: usize,
17    /// Number of unique cards reviewed.
18    pub unique_cards: usize,
19    /// Total time spent studying in seconds.
20    pub total_time_seconds: u64,
21    /// Average reviews per day.
22    pub avg_reviews_per_day: f64,
23    /// Daily breakdown.
24    pub daily: Vec<DailyStats>,
25}
26
27/// Study statistics for a single day.
28#[derive(Debug, Clone, Default, Serialize)]
29pub struct DailyStats {
30    /// Date in YYYY-MM-DD format.
31    pub date: String,
32    /// Number of reviews.
33    pub reviews: usize,
34    /// Time spent in seconds.
35    pub time_seconds: u64,
36}
37
38/// A card identified as problematic.
39#[derive(Debug, Clone, Serialize)]
40pub struct ProblemCard {
41    /// The card ID.
42    pub card_id: i64,
43    /// The note ID.
44    pub note_id: i64,
45    /// Number of lapses (times forgotten).
46    pub lapses: i64,
47    /// Total number of reviews.
48    pub reps: i64,
49    /// Current ease factor (percentage * 10).
50    pub ease: i64,
51    /// Current interval in days.
52    pub interval: i64,
53    /// The deck name.
54    pub deck_name: String,
55    /// Front field content (first field).
56    pub front: String,
57    /// Reason this card was flagged.
58    pub reason: ProblemReason,
59}
60
61/// Reason a card was flagged as problematic.
62#[derive(Debug, Clone, Serialize)]
63pub enum ProblemReason {
64    /// Card has been forgotten many times.
65    HighLapseCount(i64),
66    /// Card has very low ease factor.
67    LowEase(i64),
68    /// Card has been reviewed many times but still has short interval.
69    PoorRetention { reps: i64, interval: i64 },
70}
71
72/// Criteria for finding problem cards.
73#[derive(Debug, Clone)]
74pub struct ProblemCriteria {
75    /// Minimum lapse count to flag.
76    pub min_lapses: i64,
77    /// Maximum ease factor to flag (e.g., 2000 = 200%).
78    pub max_ease: i64,
79    /// Minimum reps with max interval for poor retention.
80    pub min_reps_for_retention: i64,
81    /// Maximum interval with high reps for poor retention.
82    pub max_interval_for_retention: i64,
83}
84
85impl Default for ProblemCriteria {
86    fn default() -> Self {
87        Self {
88            min_lapses: 5,
89            max_ease: 2000, // 200%
90            min_reps_for_retention: 10,
91            max_interval_for_retention: 7,
92        }
93    }
94}
95
96/// Analysis workflow engine.
97#[derive(Debug)]
98pub struct AnalyzeEngine<'a> {
99    client: &'a AnkiClient,
100}
101
102impl<'a> AnalyzeEngine<'a> {
103    pub(crate) fn new(client: &'a AnkiClient) -> Self {
104        Self { client }
105    }
106
107    /// Get a summary of study activity.
108    ///
109    /// # Arguments
110    ///
111    /// * `deck` - Deck to analyze (use "*" for all decks)
112    /// * `days` - Number of days to include
113    ///
114    /// # Example
115    ///
116    /// ```no_run
117    /// # use ankit_engine::Engine;
118    /// # async fn example() -> ankit_engine::Result<()> {
119    /// let engine = Engine::new();
120    /// let stats = engine.analyze().study_summary("Japanese", 30).await?;
121    /// println!("Reviewed {} cards", stats.total_reviews);
122    /// # Ok(())
123    /// # }
124    /// ```
125    pub async fn study_summary(&self, deck: &str, days: u32) -> Result<StudySummary> {
126        let daily_reviews = self.client.statistics().cards_reviewed_by_day().await?;
127
128        let mut summary = StudySummary::default();
129        let take_days = days as usize;
130
131        // Take last N days
132        let recent: Vec<_> = daily_reviews.into_iter().take(take_days).collect();
133
134        for (date, count) in &recent {
135            summary.total_reviews += *count as usize;
136            summary.daily.push(DailyStats {
137                date: date.clone(),
138                reviews: *count as usize,
139                time_seconds: 0, // Would need review data for this
140            });
141        }
142
143        if !recent.is_empty() {
144            summary.avg_reviews_per_day = summary.total_reviews as f64 / recent.len() as f64;
145        }
146
147        // Get unique cards reviewed
148        if deck != "*" {
149            let query = format!("deck:\"{}\" rated:{}", deck, days);
150            let cards = self.client.cards().find(&query).await?;
151            summary.unique_cards = cards.len();
152        }
153
154        Ok(summary)
155    }
156
157    /// Find problem cards (leeches).
158    ///
159    /// # Arguments
160    ///
161    /// * `query` - Anki search query to filter cards
162    /// * `criteria` - Criteria for identifying problems
163    ///
164    /// # Example
165    ///
166    /// ```no_run
167    /// # use ankit_engine::Engine;
168    /// # use ankit_engine::analyze::ProblemCriteria;
169    /// # async fn example() -> ankit_engine::Result<()> {
170    /// let engine = Engine::new();
171    /// let problems = engine.analyze()
172    ///     .find_problems("deck:Japanese", ProblemCriteria::default())
173    ///     .await?;
174    /// for card in problems {
175    ///     println!("Problem card: {} - {:?}", card.front, card.reason);
176    /// }
177    /// # Ok(())
178    /// # }
179    /// ```
180    pub async fn find_problems(
181        &self,
182        query: &str,
183        criteria: ProblemCriteria,
184    ) -> Result<Vec<ProblemCard>> {
185        let card_ids = self.client.cards().find(query).await?;
186
187        if card_ids.is_empty() {
188            return Ok(Vec::new());
189        }
190
191        let cards = self.client.cards().info(&card_ids).await?;
192        let mut problems = Vec::new();
193
194        for card in cards {
195            let reason = if card.lapses >= criteria.min_lapses {
196                Some(ProblemReason::HighLapseCount(card.lapses))
197            } else if card.ease_factor > 0 && card.ease_factor <= criteria.max_ease {
198                Some(ProblemReason::LowEase(card.ease_factor))
199            } else if card.reps >= criteria.min_reps_for_retention
200                && card.interval <= criteria.max_interval_for_retention
201            {
202                Some(ProblemReason::PoorRetention {
203                    reps: card.reps,
204                    interval: card.interval,
205                })
206            } else {
207                None
208            };
209
210            if let Some(reason) = reason {
211                // Get the note to get the front field
212                let note_info = self.client.notes().info(&[card.note_id]).await?;
213                let front = note_info
214                    .first()
215                    .and_then(|n| n.fields.values().next())
216                    .map(|f| f.value.clone())
217                    .unwrap_or_default();
218
219                problems.push(ProblemCard {
220                    card_id: card.card_id,
221                    note_id: card.note_id,
222                    lapses: card.lapses,
223                    reps: card.reps,
224                    ease: card.ease_factor,
225                    interval: card.interval,
226                    deck_name: card.deck_name.clone(),
227                    front,
228                    reason,
229                });
230            }
231        }
232
233        Ok(problems)
234    }
235
236    /// Get retention statistics for a deck.
237    ///
238    /// # Arguments
239    ///
240    /// * `deck` - Deck to analyze
241    ///
242    /// # Example
243    ///
244    /// ```no_run
245    /// # use ankit_engine::Engine;
246    /// # async fn example() -> ankit_engine::Result<()> {
247    /// let engine = Engine::new();
248    /// let retention = engine.analyze().retention_stats("Japanese").await?;
249    /// println!("Average ease: {}%", retention.avg_ease / 10);
250    /// # Ok(())
251    /// # }
252    /// ```
253    pub async fn retention_stats(&self, deck: &str) -> Result<RetentionStats> {
254        let query = format!("deck:\"{}\" is:review", deck);
255        let card_ids = self.client.cards().find(&query).await?;
256
257        if card_ids.is_empty() {
258            return Ok(RetentionStats::default());
259        }
260
261        let cards = self.client.cards().info(&card_ids).await?;
262        let ease_factors = self.client.cards().get_ease(&card_ids).await?;
263
264        let total_lapses: i64 = cards.iter().map(|c| c.lapses).sum();
265        let total_reps: i64 = cards.iter().map(|c| c.reps).sum();
266        let avg_ease: i64 = if !ease_factors.is_empty() {
267            ease_factors.iter().sum::<i64>() / ease_factors.len() as i64
268        } else {
269            0
270        };
271        let avg_interval: i64 = if !cards.is_empty() {
272            cards.iter().map(|c| c.interval).sum::<i64>() / cards.len() as i64
273        } else {
274            0
275        };
276
277        Ok(RetentionStats {
278            total_cards: cards.len(),
279            total_reviews: total_reps as usize,
280            total_lapses: total_lapses as usize,
281            avg_ease,
282            avg_interval,
283            retention_rate: if total_reps > 0 {
284                1.0 - (total_lapses as f64 / total_reps as f64)
285            } else {
286                0.0
287            },
288        })
289    }
290
291    /// Perform a comprehensive audit of a deck.
292    ///
293    /// Returns detailed information about deck contents including card counts,
294    /// tag distribution, empty fields, duplicates, and scheduling state.
295    ///
296    /// # Arguments
297    ///
298    /// * `deck` - Deck name to audit
299    ///
300    /// # Example
301    ///
302    /// ```no_run
303    /// # use ankit_engine::Engine;
304    /// # async fn example() -> ankit_engine::Result<()> {
305    /// let engine = Engine::new();
306    /// let audit = engine.analyze().deck_audit("Japanese").await?;
307    ///
308    /// println!("Deck: {}", audit.deck);
309    /// println!("Total cards: {}", audit.total_cards);
310    /// println!("Total notes: {}", audit.total_notes);
311    /// println!("Leeches: {}", audit.leech_count);
312    /// println!("Suspended: {}", audit.suspended_count);
313    /// println!("New: {}, Learning: {}, Review: {}",
314    ///     audit.new_cards, audit.learning_cards, audit.review_cards);
315    ///
316    /// for (model, count) in &audit.cards_by_model {
317    ///     println!("  {}: {} cards", model, count);
318    /// }
319    /// # Ok(())
320    /// # }
321    /// ```
322    pub async fn deck_audit(&self, deck: &str) -> Result<DeckAudit> {
323        let mut audit = DeckAudit {
324            deck: deck.to_string(),
325            ..Default::default()
326        };
327
328        let query = format!("deck:\"{}\"", deck);
329
330        // Get all cards in deck
331        let card_ids = self.client.cards().find(&query).await?;
332        audit.total_cards = card_ids.len();
333
334        if card_ids.is_empty() {
335            return Ok(audit);
336        }
337
338        // Get card info for scheduling and model analysis
339        let cards = self.client.cards().info(&card_ids).await?;
340
341        // Count by model and scheduling state
342        let mut ease_sum: i64 = 0;
343        let mut ease_count: usize = 0;
344
345        for card in &cards {
346            // Count by model
347            *audit
348                .cards_by_model
349                .entry(card.model_name.clone())
350                .or_insert(0) += 1;
351
352            // Count by scheduling state (card_type: 0=new, 1=learning, 2=review, 3=relearning)
353            match card.card_type {
354                0 => audit.new_cards += 1,
355                1 | 3 => audit.learning_cards += 1,
356                2 => audit.review_cards += 1,
357                _ => {}
358            }
359
360            // Check suspended (queue == -1)
361            if card.queue == -1 {
362                audit.suspended_count += 1;
363            }
364
365            // Check leech (high lapses, default threshold 8)
366            if card.lapses >= 8 {
367                audit.leech_count += 1;
368            }
369
370            // Accumulate ease for average
371            if card.ease_factor > 0 {
372                ease_sum += card.ease_factor;
373                ease_count += 1;
374            }
375        }
376
377        // Calculate average ease
378        if ease_count > 0 {
379            audit.average_ease = ease_sum as f64 / ease_count as f64;
380        }
381
382        // Get all notes in deck
383        let note_ids = self.client.notes().find(&query).await?;
384        audit.total_notes = note_ids.len();
385
386        if !note_ids.is_empty() {
387            let notes = self.client.notes().info(&note_ids).await?;
388
389            // Tag distribution and untagged count
390            for note in &notes {
391                if note.tags.is_empty() {
392                    audit.untagged_notes += 1;
393                } else {
394                    for tag in &note.tags {
395                        *audit.tag_distribution.entry(tag.clone()).or_insert(0) += 1;
396                    }
397                }
398            }
399
400            // Empty field analysis - collect all field names and check which are empty
401            let mut field_names: HashMap<String, bool> = HashMap::new();
402            for note in &notes {
403                for (field_name, field_value) in &note.fields {
404                    field_names.insert(field_name.clone(), true);
405                    if field_value.value.trim().is_empty() {
406                        *audit
407                            .empty_field_counts
408                            .entry(field_name.clone())
409                            .or_insert(0) += 1;
410                    }
411                }
412            }
413
414            // Duplicate detection - use first field as key
415            let mut seen_values: HashMap<String, usize> = HashMap::new();
416            for note in &notes {
417                // Get the first field value (sorted by order)
418                if let Some(first_field) = note
419                    .fields
420                    .values()
421                    .min_by_key(|f| f.order)
422                    .map(|f| f.value.trim().to_lowercase())
423                {
424                    if !first_field.is_empty() {
425                        *seen_values.entry(first_field).or_insert(0) += 1;
426                    }
427                }
428            }
429
430            // Count duplicates (values that appear more than once)
431            audit.duplicate_count = seen_values.values().filter(|&&count| count > 1).count();
432        }
433
434        Ok(audit)
435    }
436
437    /// Generate a comprehensive study report.
438    ///
439    /// Combines multiple statistics into a single overview including activity summary,
440    /// performance metrics, problem cards, and upcoming workload.
441    ///
442    /// # Arguments
443    ///
444    /// * `deck` - Deck to analyze (use "*" for all decks)
445    /// * `days` - Number of days to include in the report
446    ///
447    /// # Example
448    ///
449    /// ```no_run
450    /// # use ankit_engine::Engine;
451    /// # async fn example() -> ankit_engine::Result<()> {
452    /// let engine = Engine::new();
453    /// let report = engine.analyze().study_report("Japanese", 7).await?;
454    ///
455    /// println!("Study Report for {}", report.deck);
456    /// println!("Reviews: {} ({:.1}/day)", report.total_reviews, report.average_reviews_per_day);
457    /// println!("Retention: {:.1}%", report.retention_rate * 100.0);
458    /// println!("Study streak: {} days", report.study_streak);
459    /// println!("Leeches: {}", report.leeches.len());
460    /// println!("Due tomorrow: {}", report.due_tomorrow);
461    /// # Ok(())
462    /// # }
463    /// ```
464    pub async fn study_report(&self, deck: &str, days: u32) -> Result<StudyReport> {
465        let mut report = StudyReport {
466            deck: deck.to_string(),
467            period_days: days,
468            ..Default::default()
469        };
470
471        // Get daily review counts
472        let daily_reviews = self.client.statistics().cards_reviewed_by_day().await?;
473        let take_days = days as usize;
474        let recent: Vec<_> = daily_reviews.into_iter().take(take_days).collect();
475
476        // Calculate activity metrics
477        for (date, count) in &recent {
478            report.total_reviews += *count as usize;
479            report.daily_stats.push(ReportDailyStats {
480                date: date.clone(),
481                reviews: *count as usize,
482            });
483        }
484
485        if !recent.is_empty() {
486            report.average_reviews_per_day = report.total_reviews as f64 / recent.len() as f64;
487        }
488
489        // Calculate study streak (consecutive days with reviews from most recent)
490        report.study_streak = recent.iter().take_while(|(_, count)| *count > 0).count() as u32;
491
492        // Build query for deck-specific stats
493        let review_query = if deck == "*" {
494            "is:review".to_string()
495        } else {
496            format!("deck:\"{}\" is:review", deck)
497        };
498
499        let review_card_ids = self.client.cards().find(&review_query).await?;
500
501        if !review_card_ids.is_empty() {
502            let cards = self.client.cards().info(&review_card_ids).await?;
503
504            // Calculate retention and ease
505            let total_lapses: i64 = cards.iter().map(|c| c.lapses).sum();
506            let total_reps: i64 = cards.iter().map(|c| c.reps).sum();
507
508            if total_reps > 0 {
509                report.retention_rate = 1.0 - (total_lapses as f64 / total_reps as f64);
510            }
511
512            let ease_values: Vec<i64> = cards
513                .iter()
514                .filter(|c| c.ease_factor > 0)
515                .map(|c| c.ease_factor)
516                .collect();
517
518            if !ease_values.is_empty() {
519                report.average_ease =
520                    ease_values.iter().sum::<i64>() as f64 / ease_values.len() as f64;
521            }
522
523            // Find problem cards
524            for card in &cards {
525                // Leeches: 8+ lapses (Anki default)
526                if card.lapses >= 8 {
527                    report.leeches.push(card.card_id);
528                }
529                // Low ease: below 200% (2000)
530                if card.ease_factor > 0 && card.ease_factor < 2000 {
531                    report.low_ease_cards.push(card.card_id);
532                }
533            }
534
535            // Count relearning cards
536            report.relearning_cards = cards.iter().filter(|c| c.card_type == 3).count();
537        }
538
539        // Get cards studied in period (rated:N query)
540        if deck != "*" {
541            let rated_query = format!("deck:\"{}\" rated:{}", deck, days);
542            let rated_cards = self.client.cards().find(&rated_query).await?;
543
544            if !rated_cards.is_empty() {
545                let card_infos = self.client.cards().info(&rated_cards).await?;
546
547                // Count by type
548                for card in &card_infos {
549                    match card.card_type {
550                        0 => report.new_cards_studied += 1,
551                        2 => report.review_cards_studied += 1,
552                        _ => {}
553                    }
554                }
555            }
556        }
557
558        // Get upcoming workload
559        let due_tomorrow_query = if deck == "*" {
560            "prop:due=1".to_string()
561        } else {
562            format!("deck:\"{}\" prop:due=1", deck)
563        };
564        let due_tomorrow_cards = self.client.cards().find(&due_tomorrow_query).await?;
565        report.due_tomorrow = due_tomorrow_cards.len();
566
567        let due_week_query = if deck == "*" {
568            "prop:due<=7".to_string()
569        } else {
570            format!("deck:\"{}\" prop:due<=7", deck)
571        };
572        let due_week_cards = self.client.cards().find(&due_week_query).await?;
573        report.due_this_week = due_week_cards.len();
574
575        Ok(report)
576    }
577
578    /// Compare two decks for overlap and differences.
579    ///
580    /// Analyzes notes in both decks based on a key field, identifying:
581    /// - Notes unique to each deck
582    /// - Exact matches (identical key field values)
583    /// - Similar notes (fuzzy matching above threshold)
584    ///
585    /// # Arguments
586    ///
587    /// * `deck_a` - Name of the first deck
588    /// * `deck_b` - Name of the second deck
589    /// * `options` - Comparison options (key field and similarity threshold)
590    ///
591    /// # Example
592    ///
593    /// ```no_run
594    /// # use ankit_engine::Engine;
595    /// # use ankit_engine::analyze::CompareOptions;
596    /// # async fn example() -> ankit_engine::Result<()> {
597    /// let engine = Engine::new();
598    ///
599    /// let comparison = engine.analyze()
600    ///     .compare_decks("Japanese::Core", "Japanese::Extra", CompareOptions {
601    ///         key_field: "Front".to_string(),
602    ///         similarity_threshold: 0.85,
603    ///     })
604    ///     .await?;
605    ///
606    /// println!("Only in Core: {}", comparison.only_in_a.len());
607    /// println!("Only in Extra: {}", comparison.only_in_b.len());
608    /// println!("Exact matches: {}", comparison.exact_matches.len());
609    /// println!("Similar: {}", comparison.similar.len());
610    ///
611    /// for pair in &comparison.similar {
612    ///     println!("  {:.0}% similar: '{}' vs '{}'",
613    ///         pair.similarity * 100.0,
614    ///         pair.note_a.key_value,
615    ///         pair.note_b.key_value);
616    /// }
617    /// # Ok(())
618    /// # }
619    /// ```
620    pub async fn compare_decks(
621        &self,
622        deck_a: &str,
623        deck_b: &str,
624        options: CompareOptions,
625    ) -> Result<DeckComparison> {
626        let mut comparison = DeckComparison {
627            deck_a: deck_a.to_string(),
628            deck_b: deck_b.to_string(),
629            key_field: options.key_field.clone(),
630            similarity_threshold: options.similarity_threshold,
631            ..Default::default()
632        };
633
634        // Get notes from both decks
635        let query_a = format!("deck:\"{}\"", deck_a);
636        let query_b = format!("deck:\"{}\"", deck_b);
637
638        let note_ids_a = self.client.notes().find(&query_a).await?;
639        let note_ids_b = self.client.notes().find(&query_b).await?;
640
641        if note_ids_a.is_empty() && note_ids_b.is_empty() {
642            return Ok(comparison);
643        }
644
645        // Get note info
646        let notes_a = if note_ids_a.is_empty() {
647            Vec::new()
648        } else {
649            self.client.notes().info(&note_ids_a).await?
650        };
651
652        let notes_b = if note_ids_b.is_empty() {
653            Vec::new()
654        } else {
655            self.client.notes().info(&note_ids_b).await?
656        };
657
658        // Extract key field values
659        let extract_key = |note: &ankit::NoteInfo| -> Option<(i64, String, Vec<String>)> {
660            note.fields
661                .get(&options.key_field)
662                .map(|f| (note.note_id, f.value.trim().to_string(), note.tags.clone()))
663        };
664
665        let keys_a: Vec<_> = notes_a.iter().filter_map(extract_key).collect();
666        let keys_b: Vec<_> = notes_b.iter().filter_map(extract_key).collect();
667
668        // Build lookup map for deck B (for exact matching from A)
669        let map_b: HashMap<String, (i64, Vec<String>)> = keys_b
670            .iter()
671            .map(|(id, key, tags)| (key.to_lowercase(), (*id, tags.clone())))
672            .collect();
673
674        // Track which notes have been matched
675        let mut matched_in_a: std::collections::HashSet<i64> = std::collections::HashSet::new();
676        let mut matched_in_b: std::collections::HashSet<i64> = std::collections::HashSet::new();
677
678        // Find exact matches
679        for (note_id_a, key_a, tags_a) in &keys_a {
680            let key_lower = key_a.to_lowercase();
681            if let Some((note_id_b, tags_b)) = map_b.get(&key_lower) {
682                matched_in_a.insert(*note_id_a);
683                matched_in_b.insert(*note_id_b);
684
685                comparison.exact_matches.push((
686                    ComparisonNote {
687                        note_id: *note_id_a,
688                        key_value: key_a.clone(),
689                        tags: tags_a.clone(),
690                    },
691                    ComparisonNote {
692                        note_id: *note_id_b,
693                        key_value: key_a.clone(), // Same value
694                        tags: tags_b.clone(),
695                    },
696                ));
697            }
698        }
699
700        // Find similar matches (only for unmatched notes)
701        if options.similarity_threshold < 1.0 {
702            for (note_id_a, key_a, tags_a) in &keys_a {
703                if matched_in_a.contains(note_id_a) {
704                    continue;
705                }
706
707                for (note_id_b, key_b, tags_b) in &keys_b {
708                    if matched_in_b.contains(note_id_b) {
709                        continue;
710                    }
711
712                    let similarity = string_similarity(key_a, key_b);
713                    if similarity >= options.similarity_threshold {
714                        matched_in_a.insert(*note_id_a);
715                        matched_in_b.insert(*note_id_b);
716
717                        comparison.similar.push(SimilarPair {
718                            note_a: ComparisonNote {
719                                note_id: *note_id_a,
720                                key_value: key_a.clone(),
721                                tags: tags_a.clone(),
722                            },
723                            note_b: ComparisonNote {
724                                note_id: *note_id_b,
725                                key_value: key_b.clone(),
726                                tags: tags_b.clone(),
727                            },
728                            similarity,
729                        });
730
731                        break; // Move to next note in A
732                    }
733                }
734            }
735        }
736
737        // Collect unmatched notes
738        for (note_id_a, key_a, tags_a) in &keys_a {
739            if !matched_in_a.contains(note_id_a) {
740                comparison.only_in_a.push(ComparisonNote {
741                    note_id: *note_id_a,
742                    key_value: key_a.clone(),
743                    tags: tags_a.clone(),
744                });
745            }
746        }
747
748        for (note_id_b, key_b, tags_b) in &keys_b {
749            if !matched_in_b.contains(note_id_b) {
750                comparison.only_in_b.push(ComparisonNote {
751                    note_id: *note_id_b,
752                    key_value: key_b.clone(),
753                    tags: tags_b.clone(),
754                });
755            }
756        }
757
758        Ok(comparison)
759    }
760
761    /// Generate a study plan with recommendations.
762    ///
763    /// Creates a plan for a study session based on due cards, new cards,
764    /// and target study time. Provides recommendations for optimizing
765    /// the session.
766    ///
767    /// # Arguments
768    ///
769    /// * `deck` - Deck to plan for
770    /// * `options` - Planning options (target time, new card ratio, etc.)
771    ///
772    /// # Example
773    ///
774    /// ```no_run
775    /// # use ankit_engine::Engine;
776    /// # use ankit_engine::analyze::PlanOptions;
777    /// # async fn example() -> ankit_engine::Result<()> {
778    /// let engine = Engine::new();
779    ///
780    /// let plan = engine.analyze()
781    ///     .study_plan("Japanese", PlanOptions {
782    ///         target_time_minutes: 30,
783    ///         new_card_ratio: 0.2,
784    ///         prioritize_leeches: true,
785    ///         ..PlanOptions::default()
786    ///     })
787    ///     .await?;
788    ///
789    /// println!("Estimated time: {} minutes", plan.estimated_time);
790    /// println!("Reviews: {}, New: {}", plan.review_count, plan.new_count);
791    ///
792    /// for rec in &plan.recommendations {
793    ///     println!("- {}", rec);
794    /// }
795    /// # Ok(())
796    /// # }
797    /// ```
798    pub async fn study_plan(&self, deck: &str, options: PlanOptions) -> Result<StudyPlan> {
799        let mut plan = StudyPlan {
800            deck: deck.to_string(),
801            ..Default::default()
802        };
803
804        // Get due cards
805        let due_query = format!("deck:\"{}\" is:due -is:suspended", deck);
806        let due_card_ids = self.client.cards().find(&due_query).await?;
807        plan.total_due = due_card_ids.len();
808
809        // Get new cards
810        let new_query = format!("deck:\"{}\" is:new -is:suspended", deck);
811        let new_card_ids = self.client.cards().find(&new_query).await?;
812        plan.total_new_available = new_card_ids.len();
813
814        if due_card_ids.is_empty() && new_card_ids.is_empty() {
815            plan.recommendations
816                .push("No cards to study! Consider adding new material.".to_string());
817            return Ok(plan);
818        }
819
820        // Get card info for prioritization
821        let due_cards = if due_card_ids.is_empty() {
822            Vec::new()
823        } else {
824            self.client.cards().info(&due_card_ids).await?
825        };
826
827        // Calculate target card counts based on time
828        let total_seconds = options.target_time_minutes * 60;
829
830        // First, identify leeches and calculate how many we can fit
831        let mut leech_ids: Vec<i64> = Vec::new();
832        let mut regular_review_ids: Vec<i64> = Vec::new();
833
834        for card in &due_cards {
835            if card.lapses >= options.leech_threshold {
836                leech_ids.push(card.card_id);
837            } else {
838                regular_review_ids.push(card.card_id);
839            }
840        }
841
842        plan.leech_count = leech_ids.len();
843
844        // Calculate card allocation
845        let mut remaining_seconds = total_seconds;
846        let mut selected_reviews: Vec<i64> = Vec::new();
847        let mut selected_new: Vec<i64> = Vec::new();
848
849        // If prioritizing leeches, add them first
850        if options.prioritize_leeches && !leech_ids.is_empty() {
851            let leech_time = leech_ids.len() as u32 * options.seconds_per_review_card;
852            if leech_time <= remaining_seconds {
853                selected_reviews.extend(&leech_ids);
854                remaining_seconds -= leech_time;
855            } else {
856                // Can only fit some leeches
857                let max_leeches = (remaining_seconds / options.seconds_per_review_card) as usize;
858                selected_reviews.extend(leech_ids.iter().take(max_leeches));
859                remaining_seconds = 0;
860            }
861        }
862
863        // Calculate remaining time split between new and review cards
864        if remaining_seconds > 0 {
865            // Target ratio of new cards
866            let new_time_budget = (remaining_seconds as f64 * options.new_card_ratio) as u32;
867            let review_time_budget = remaining_seconds - new_time_budget;
868
869            // How many of each can we fit?
870            let max_new = (new_time_budget / options.seconds_per_new_card) as usize;
871            let max_reviews = (review_time_budget / options.seconds_per_review_card) as usize;
872
873            // Select regular reviews (excluding already-selected leeches)
874            let available_reviews: Vec<i64> = if options.prioritize_leeches {
875                regular_review_ids.clone()
876            } else {
877                due_card_ids.clone()
878            };
879
880            let reviews_to_add = available_reviews.iter().take(max_reviews);
881            selected_reviews.extend(reviews_to_add);
882
883            // Select new cards
884            let new_to_add = new_card_ids.iter().take(max_new);
885            selected_new.extend(new_to_add);
886        }
887
888        // Build the suggested order
889        // Order: Leeches first (if prioritized), then reviews, then new
890        let mut ordered_cards: Vec<(i64, CardPriority)> = Vec::new();
891
892        if options.prioritize_leeches {
893            for &id in &leech_ids {
894                if selected_reviews.contains(&id) {
895                    ordered_cards.push((id, CardPriority::Leech));
896                }
897            }
898        }
899
900        for &id in &selected_reviews {
901            if !leech_ids.contains(&id) || !options.prioritize_leeches {
902                ordered_cards.push((id, CardPriority::DueReview));
903            }
904        }
905
906        for &id in &selected_new {
907            ordered_cards.push((id, CardPriority::New));
908        }
909
910        // Sort by priority
911        ordered_cards.sort_by_key(|(_, priority)| *priority);
912
913        plan.suggested_order = ordered_cards.into_iter().map(|(id, _)| id).collect();
914        plan.review_count = selected_reviews.len();
915        plan.new_count = selected_new.len();
916
917        // Calculate estimated time
918        let review_time = plan.review_count as u32 * options.seconds_per_review_card;
919        let new_time = plan.new_count as u32 * options.seconds_per_new_card;
920        plan.estimated_time = (review_time + new_time) / 60;
921
922        // Generate recommendations
923        if plan.leech_count > 0 {
924            plan.recommendations.push(format!(
925                "You have {} leech cards that need extra attention.",
926                plan.leech_count
927            ));
928        }
929
930        if plan.total_due > plan.review_count {
931            plan.recommendations.push(format!(
932                "Only {} of {} due cards fit in your target time.",
933                plan.review_count, plan.total_due
934            ));
935        }
936
937        if plan.total_new_available > 0 && plan.new_count == 0 {
938            plan.recommendations
939                .push("No time for new cards today. Consider increasing study time.".to_string());
940        } else if plan.new_count > 0 {
941            plan.recommendations.push(format!(
942                "Introducing {} new cards ({:.0}% of session).",
943                plan.new_count,
944                (plan.new_count as f64 / (plan.review_count + plan.new_count) as f64) * 100.0
945            ));
946        }
947
948        if plan.review_count + plan.new_count == 0 {
949            plan.recommendations
950                .push("No cards fit the target time. Try increasing study time.".to_string());
951        }
952
953        let actual_ratio = if plan.review_count + plan.new_count > 0 {
954            plan.new_count as f64 / (plan.review_count + plan.new_count) as f64
955        } else {
956            0.0
957        };
958
959        if actual_ratio < options.new_card_ratio * 0.5 && plan.total_new_available > 0 {
960            plan.recommendations.push(
961                "New card ratio is below target. You may be accumulating a review backlog."
962                    .to_string(),
963            );
964        }
965
966        Ok(plan)
967    }
968}
969
970/// Calculate string similarity using normalized Levenshtein distance.
971///
972/// Returns a value between 0.0 (completely different) and 1.0 (identical).
973fn string_similarity(a: &str, b: &str) -> f64 {
974    let a_lower = a.to_lowercase();
975    let b_lower = b.to_lowercase();
976
977    if a_lower == b_lower {
978        return 1.0;
979    }
980
981    if a_lower.is_empty() || b_lower.is_empty() {
982        return 0.0;
983    }
984
985    let distance = levenshtein_distance(&a_lower, &b_lower);
986    let max_len = a_lower.chars().count().max(b_lower.chars().count());
987
988    1.0 - (distance as f64 / max_len as f64)
989}
990
991/// Calculate the Levenshtein distance between two strings.
992fn levenshtein_distance(a: &str, b: &str) -> usize {
993    let a_chars: Vec<char> = a.chars().collect();
994    let b_chars: Vec<char> = b.chars().collect();
995
996    let m = a_chars.len();
997    let n = b_chars.len();
998
999    if m == 0 {
1000        return n;
1001    }
1002    if n == 0 {
1003        return m;
1004    }
1005
1006    // Use two rows instead of full matrix for memory efficiency
1007    let mut prev: Vec<usize> = (0..=n).collect();
1008    let mut curr = vec![0; n + 1];
1009
1010    for i in 1..=m {
1011        curr[0] = i;
1012
1013        for j in 1..=n {
1014            let cost = if a_chars[i - 1] == b_chars[j - 1] {
1015                0
1016            } else {
1017                1
1018            };
1019
1020            curr[j] = (prev[j] + 1) // deletion
1021                .min(curr[j - 1] + 1) // insertion
1022                .min(prev[j - 1] + cost); // substitution
1023        }
1024
1025        std::mem::swap(&mut prev, &mut curr);
1026    }
1027
1028    prev[n]
1029}
1030
1031/// Comprehensive study report combining multiple statistics.
1032///
1033/// Provides a complete overview of study activity, performance, problem areas,
1034/// and upcoming workload for a deck over a specified time period.
1035#[derive(Debug, Clone, Default, Serialize)]
1036pub struct StudyReport {
1037    /// The deck name (or "*" for all decks).
1038    pub deck: String,
1039    /// Number of days covered by this report.
1040    pub period_days: u32,
1041
1042    // Activity summary
1043    /// Total number of reviews in the period.
1044    pub total_reviews: usize,
1045    /// Total time spent studying in minutes.
1046    pub total_time_minutes: u64,
1047    /// Average reviews per day.
1048    pub average_reviews_per_day: f64,
1049    /// Consecutive days with at least one review.
1050    pub study_streak: u32,
1051
1052    // Performance metrics
1053    /// Estimated retention rate (0.0 - 1.0).
1054    pub retention_rate: f64,
1055    /// Average ease factor (percentage * 10, e.g., 2500 = 250%).
1056    pub average_ease: f64,
1057
1058    // Cards reviewed breakdown
1059    /// Number of new cards studied in the period.
1060    pub new_cards_studied: usize,
1061    /// Number of review cards studied in the period.
1062    pub review_cards_studied: usize,
1063    /// Number of cards in relearning state.
1064    pub relearning_cards: usize,
1065
1066    // Problem areas (card IDs)
1067    /// Card IDs flagged as leeches (high lapses).
1068    pub leeches: Vec<i64>,
1069    /// Card IDs with low ease factor (below 200%).
1070    pub low_ease_cards: Vec<i64>,
1071
1072    // Upcoming workload
1073    /// Number of cards due tomorrow.
1074    pub due_tomorrow: usize,
1075    /// Number of cards due within the next 7 days.
1076    pub due_this_week: usize,
1077
1078    // Daily breakdown
1079    /// Statistics for each day in the period.
1080    pub daily_stats: Vec<ReportDailyStats>,
1081}
1082
1083/// Daily statistics for a study report.
1084#[derive(Debug, Clone, Default, Serialize)]
1085pub struct ReportDailyStats {
1086    /// Date in YYYY-MM-DD format.
1087    pub date: String,
1088    /// Number of reviews on this day.
1089    pub reviews: usize,
1090}
1091
1092/// Options for comparing two decks.
1093#[derive(Debug, Clone)]
1094pub struct CompareOptions {
1095    /// Field name to use as the comparison key (e.g., "Front").
1096    pub key_field: String,
1097    /// Similarity threshold for fuzzy matching (0.0 - 1.0).
1098    /// Cards with similarity >= this value are considered similar.
1099    /// Set to 1.0 for exact matches only.
1100    pub similarity_threshold: f64,
1101}
1102
1103impl Default for CompareOptions {
1104    fn default() -> Self {
1105        Self {
1106            key_field: "Front".to_string(),
1107            similarity_threshold: 0.9,
1108        }
1109    }
1110}
1111
1112/// Result of comparing two decks.
1113#[derive(Debug, Clone, Default, Serialize)]
1114pub struct DeckComparison {
1115    /// Name of the first deck.
1116    pub deck_a: String,
1117    /// Name of the second deck.
1118    pub deck_b: String,
1119    /// Field used for comparison.
1120    pub key_field: String,
1121    /// Similarity threshold used.
1122    pub similarity_threshold: f64,
1123
1124    /// Notes only in deck A (not in B).
1125    pub only_in_a: Vec<ComparisonNote>,
1126    /// Notes only in deck B (not in A).
1127    pub only_in_b: Vec<ComparisonNote>,
1128    /// Notes with exact matching key field values.
1129    pub exact_matches: Vec<(ComparisonNote, ComparisonNote)>,
1130    /// Notes with similar (but not exact) key field values.
1131    pub similar: Vec<SimilarPair>,
1132}
1133
1134/// A note in a comparison result.
1135#[derive(Debug, Clone, Serialize)]
1136pub struct ComparisonNote {
1137    /// The note ID.
1138    pub note_id: i64,
1139    /// The value of the key field.
1140    pub key_value: String,
1141    /// The note's tags.
1142    pub tags: Vec<String>,
1143}
1144
1145/// A pair of similar notes from two decks.
1146#[derive(Debug, Clone, Serialize)]
1147pub struct SimilarPair {
1148    /// Note from deck A.
1149    pub note_a: ComparisonNote,
1150    /// Note from deck B.
1151    pub note_b: ComparisonNote,
1152    /// Similarity score (0.0 - 1.0).
1153    pub similarity: f64,
1154}
1155
1156/// Retention statistics for a deck.
1157#[derive(Debug, Clone, Default, Serialize)]
1158pub struct RetentionStats {
1159    /// Total number of review cards.
1160    pub total_cards: usize,
1161    /// Total number of reviews.
1162    pub total_reviews: usize,
1163    /// Total number of lapses.
1164    pub total_lapses: usize,
1165    /// Average ease factor (percentage * 10).
1166    pub avg_ease: i64,
1167    /// Average interval in days.
1168    pub avg_interval: i64,
1169    /// Estimated retention rate (0.0 - 1.0).
1170    pub retention_rate: f64,
1171}
1172
1173/// Comprehensive audit of a deck's contents and health.
1174///
1175/// Combines multiple analyses into a single report including card counts,
1176/// tag distribution, empty fields, duplicates, and scheduling state.
1177#[derive(Debug, Clone, Default, Serialize)]
1178pub struct DeckAudit {
1179    /// The deck name.
1180    pub deck: String,
1181    /// Total number of cards.
1182    pub total_cards: usize,
1183    /// Total number of notes.
1184    pub total_notes: usize,
1185
1186    // Card counts by model
1187    /// Number of cards per note type (model).
1188    pub cards_by_model: HashMap<String, usize>,
1189
1190    // Tag coverage
1191    /// Number of notes per tag.
1192    pub tag_distribution: HashMap<String, usize>,
1193    /// Number of notes without any tags.
1194    pub untagged_notes: usize,
1195
1196    // Field analysis
1197    /// Number of notes with each field empty (field name -> count).
1198    pub empty_field_counts: HashMap<String, usize>,
1199
1200    // Duplicates
1201    /// Number of potential duplicate notes detected.
1202    pub duplicate_count: usize,
1203
1204    // Problem cards
1205    /// Number of leech cards (high lapses).
1206    pub leech_count: usize,
1207    /// Number of suspended cards.
1208    pub suspended_count: usize,
1209
1210    // Scheduling summary
1211    /// Number of new cards (never reviewed).
1212    pub new_cards: usize,
1213    /// Number of cards in learning phase.
1214    pub learning_cards: usize,
1215    /// Number of review cards.
1216    pub review_cards: usize,
1217    /// Average ease factor (percentage * 10, e.g., 2500 = 250%).
1218    pub average_ease: f64,
1219}
1220
1221/// Options for generating a study plan.
1222#[derive(Debug, Clone)]
1223pub struct PlanOptions {
1224    /// Target study time in minutes.
1225    pub target_time_minutes: u32,
1226    /// Ratio of new cards (0.0 - 1.0). E.g., 0.2 means 20% new cards.
1227    pub new_card_ratio: f64,
1228    /// Whether to prioritize leech cards (cards with high lapses).
1229    pub prioritize_leeches: bool,
1230    /// Estimated seconds per new card.
1231    pub seconds_per_new_card: u32,
1232    /// Estimated seconds per review card.
1233    pub seconds_per_review_card: u32,
1234    /// Leech threshold (minimum lapses to consider a card a leech).
1235    pub leech_threshold: i64,
1236}
1237
1238impl Default for PlanOptions {
1239    fn default() -> Self {
1240        Self {
1241            target_time_minutes: 30,
1242            new_card_ratio: 0.2,
1243            prioritize_leeches: true,
1244            seconds_per_new_card: 30,   // 30 seconds for new cards
1245            seconds_per_review_card: 8, // 8 seconds for reviews
1246            leech_threshold: 8,
1247        }
1248    }
1249}
1250
1251/// A generated study plan with recommendations.
1252#[derive(Debug, Clone, Default, Serialize)]
1253pub struct StudyPlan {
1254    /// The deck name.
1255    pub deck: String,
1256    /// Estimated time to complete the plan in minutes.
1257    pub estimated_time: u32,
1258    /// Number of review cards in the plan.
1259    pub review_count: usize,
1260    /// Number of new cards in the plan.
1261    pub new_count: usize,
1262    /// Number of leech cards in the plan.
1263    pub leech_count: usize,
1264    /// Total cards due today.
1265    pub total_due: usize,
1266    /// Total new cards available.
1267    pub total_new_available: usize,
1268    /// Recommendations for the study session.
1269    pub recommendations: Vec<String>,
1270    /// Suggested card IDs in study order.
1271    pub suggested_order: Vec<i64>,
1272}
1273
1274/// Priority category for a card in the study plan.
1275#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
1276enum CardPriority {
1277    /// Leech cards should be studied first when prioritize_leeches is true.
1278    Leech,
1279    /// Review cards due today.
1280    DueReview,
1281    /// New cards.
1282    New,
1283}
ankit_engine/analyze.rs

ankit_engine/
analyze.rs