ankify 0.1.1

Generate and sync Anki flashcards from your Typst documents.
Documentation
//! Cache module.
//!
//! This module exists to support two main design goals:
//!
//! 1.  API calls to AnkiConnect are (very) expensive, so we want to keep them
//!     to a minimum.
//! 2.  At the same time, we want the calls we do make to succeed.
//!
//! To aid with the 1st goal, we make stipulate that the contents of Typst files
//! syncing to Anki take precedence over the contents of Anki's database. So,
//! for example, if a user modifies a note in Anki that was generated by Ankify,
//! those changes may be overwritten the next time Ankify syncs that note. This
//! principle implies that we can keep track of the contents of Ankify-generated
//! notes in the Anki database by keeping track of the last content that we
//! pushed to them. We do this as follows:
//!
//! -   We track the contents of each field of each note we pushed.
//! -   For Png and Svg contents, this tracking is done by hashing the
//!     corresponding media file and storing the hash in the cache.
//! -   For Plain contents, we hash the text directly.
//!
//! For each note, we also track tags, deck, model, and the ID that Anki
//! assigned to the note. This will help us address the 2nd design goal, as
//! we'll see in the `sync` module.
//!
//! This module is meant to specifically serve the `sync` module, which is
//! responsible for synchronizing notes between Typst and Anki. The `sync`
//! module may ask the `cache` module to
//!
//! -   read the cache, so that the `sync` module can determine which
//!     AnkiConnect requests to make, and to
//! -   update the cache, after the `sync` module has received a response from
//!     AnkiConnect indicating that a request was successful. To update the
//!     cache, the `cache` module will receive notes in the form of `Note`
//!     structs, as defined in the `metadata` module.

use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256 as Sha256Hasher};
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use tokio::fs;

use crate::ankiconnect::{Deck, Field, Model, Note as AnkiNote, NoteId, Tag};
use crate::compile::Format;
use crate::error::{Error, Result};
use crate::metadata::CompletedNote;
use futures;

/// A SHA-256 hash represented as a hexadecimal string.
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct Sha256(String);

impl Sha256 {
    /// Get the hash as a string slice.
    pub fn as_str(&self) -> &str {
        &self.0
    }

    /// Create a SHA-256 hash from raw bytes.
    pub fn from_bytes(data: &[u8]) -> Self {
        let hash = Sha256Hasher::digest(data);
        Self(format!("{:x}", hash))
    }

    /// Create a SHA-256 hash from text content.
    pub fn from_text(text: &str) -> Self {
        Self::from_bytes(text.as_bytes())
    }
}

/// Label of a note, as parsed from a Typst file's metadata.
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct Label(String);

impl Label {
    /// Create a new label.
    pub fn new(label: String) -> Self {
        Self(label)
    }

    /// Get the label as a string slice.
    pub fn as_str(&self) -> &str {
        &self.0
    }
}

impl From<String> for Label {
    fn from(s: String) -> Self {
        Self(s)
    }
}

/// A cache entry representing the last known state of a note in Anki.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct CacheEntry {
    /// The label of the note, as parsed from the Typst file's metadata.
    pub label: Label,

    /// The numeric `id` that Anki assigned to the note (for new notes,
    /// this will first be known when AnkiConnect responds to the addNotes request)
    pub id: NoteId,

    /// The tags associated with this note.
    pub tags: Vec<Tag>,

    /// This is a map from the field name to the hash of the field's content.
    /// For Png and Svg contents, this hash is the hash of the media file. For
    /// Plain contents, this hash is the hash of the text.
    pub hash: HashMap<Field, Option<Sha256>>,

    /// The Anki deck to which this note belongs.
    pub deck: Deck,

    /// The Anki model that this note uses.
    pub model: Model,
}

impl CacheEntry {
    /// Get the hash for a specific field, if it exists.
    pub fn get_field_hash(&self, field: &Field) -> Option<&Sha256> {
        self.hash.get(field).and_then(|opt| opt.as_ref())
    }
}

/// The main cache structure that manages cached note data.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Cache {
    /// Map from note label to cache entry.
    #[serde(flatten)]
    entries: HashMap<String, CacheEntry>,

    /// Path to the cache file.
    #[serde(skip)]
    cache_file: Option<PathBuf>,
}

impl Cache {
    /// Create a new empty cache.
    pub fn new() -> Self {
        Self {
            entries: HashMap::new(),
            cache_file: None,
        }
    }

    /// Create a new cache with a specific cache file path.
    pub fn with_file<P: AsRef<Path>>(cache_file: P) -> Self {
        Self {
            entries: HashMap::new(),
            cache_file: Some(cache_file.as_ref().to_path_buf()),
        }
    }

    /// Load cache from a file. Creates a new empty cache if the file doesn't exist.
    pub async fn load_from_file<P: AsRef<Path>>(cache_file: P) -> Result<Self> {
        let path = cache_file.as_ref();

        if !path.exists() {
            return Ok(Self::with_file(path));
        }

        let content = fs::read_to_string(path).await.map_err(|e| {
            Error::cache(format!(
                "Failed to read cache file '{}': {}",
                path.display(),
                e
            ))
        })?;

        let mut cache: Cache = serde_json::from_str(&content).map_err(|e| {
            Error::cache(format!(
                "Failed to parse cache file '{}': {}",
                path.display(),
                e
            ))
        })?;

        cache.cache_file = Some(path.to_path_buf());
        Ok(cache)
    }

    /// Save the cache to its associated file.
    pub async fn save(&self) -> Result<()> {
        let path = self
            .cache_file
            .as_ref()
            .ok_or_else(|| Error::cache("No cache file path specified"))?;

        // Create parent directories if they don't exist
        if let Some(parent) = path.parent() {
            fs::create_dir_all(parent).await.map_err(|e| {
                Error::cache(format!(
                    "Failed to create cache directory '{}': {}",
                    parent.display(),
                    e
                ))
            })?;
        }

        let content = serde_json::to_string_pretty(self)
            .map_err(|e| Error::cache(format!("Failed to serialize cache: {}", e)))?;

        fs::write(path, content).await.map_err(|e| {
            Error::cache(format!(
                "Failed to write cache file '{}': {}",
                path.display(),
                e
            ))
        })?;

        Ok(())
    }

    /// Get a cache entry by label.
    pub fn get(&self, label: &str) -> Option<&CacheEntry> {
        self.entries.get(label)
    }

    /// Insert or update a cache entry.
    pub fn insert(&mut self, label: String, entry: CacheEntry) {
        self.entries.insert(label, entry);
    }

    /// Remove and return the cache entry for the given label, if present.
    pub fn remove(&mut self, label: &str) -> Option<CacheEntry> {
        self.entries.remove(label)
    }

    /// Check if the cache contains an entry for the given label.
    pub fn contains(&self, label: &str) -> bool {
        self.entries.contains_key(label)
    }

    /// Get all entries in the cache.
    pub fn entries(&self) -> &HashMap<String, CacheEntry> {
        &self.entries
    }

    /// Update the cache with a note from the metadata module.
    /// This is typically called after a successful sync operation.
    pub async fn update_from_note(
        &mut self,
        note: &CompletedNote,
        note_id: NoteId,
        field_hashes: HashMap<Field, Option<Sha256>>,
    ) -> Result<()> {
        let label = Label::new(note.label.clone());
        let deck = Deck::new(note.deck.clone());
        let model = Model::new(note.model.clone());
        let tags = note.tags.iter().map(|t| Tag::new(t.clone())).collect();

        let entry = CacheEntry {
            label,
            id: note_id,
            tags,
            hash: field_hashes,
            deck,
            model,
        };
        self.insert(note.label.clone(), entry);

        Ok(())
    }

    /// Get the Anki note ID for a cached note, if it exists.
    pub fn get_note_id(&self, label: &str) -> Option<NoteId> {
        self.get(label).map(|entry| entry.id.clone())
    }

    /// Create a content hash for every field of a note, for change detection.
    ///
    /// Plain-text and SVG fields carry their own content in the field value
    /// (SVG is inlined), so they are hashed directly. PNG fields reference a
    /// rendered media file, which is hashed from disk.
    pub async fn create_field_hashes(
        &self,
        anki_note: &AnkiNote,
        note_metadata: &CompletedNote,
    ) -> Result<HashMap<Field, Option<Sha256>>> {
        let mut field_hashes: HashMap<Field, Option<Sha256>> = HashMap::new();
        let mut file_futures = Vec::new();

        for (field, value) in &note_metadata.data {
            let field_key = Field::new(field.clone());

            // A PNG field's content lives in a rendered media file; everything
            // else (plain text, inline SVG) is carried in the field value.
            let media_path = if Format::parse(value.format.as_str())? == Format::Png {
                anki_note.picture.as_ref().and_then(|pictures| {
                    pictures
                        .iter()
                        .find(|media| {
                            media
                                .fields
                                .as_ref()
                                .is_some_and(|fields| fields.iter().any(|f| f.as_str() == field))
                        })
                        .and_then(|media| media.path.clone())
                })
            } else {
                None
            };

            match media_path {
                Some(path) => {
                    let path = PathBuf::from(path);
                    file_futures.push(async move {
                        let content = fs::read(&path).await.map_err(|e| {
                            Error::cache(format!(
                                "Failed to read media file '{}': {}",
                                path.display(),
                                e
                            ))
                        })?;
                        Ok::<(Field, Option<Sha256>), Error>((
                            field_key,
                            Some(Sha256::from_bytes(&content)),
                        ))
                    });
                }
                None => {
                    let hash = anki_note
                        .fields
                        .get(&field_key)
                        .and_then(|v| v.0.as_ref())
                        .map(|s| Sha256::from_text(s.as_str()));
                    field_hashes.insert(field_key, hash);
                }
            }
        }

        for result in futures::future::join_all(file_futures).await {
            let (field, hash) = result?;
            field_hashes.insert(field, hash);
        }

        Ok(field_hashes)
    }
}

impl Default for Cache {
    fn default() -> Self {
        Self::new()
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn sha256_is_deterministic() {
        assert_eq!(Sha256::from_text("hello"), Sha256::from_text("hello"));
    }

    #[test]
    fn sha256_differs_for_different_input() {
        assert_ne!(Sha256::from_text("a"), Sha256::from_text("b"));
    }

    #[test]
    fn sha256_from_text_matches_from_bytes() {
        assert_eq!(Sha256::from_text("x"), Sha256::from_bytes(b"x"));
    }
}