kimun_core 0.2.8

Core library for the Kimün notes application
Documentation
pub(crate) mod content_extractor;

use std::fmt::Display;

use content_extractor::{
    extract_title, get_chunks_and_links, get_content_chunks, get_content_data,
};
pub use content_extractor::{
    is_remote_url, link_char_spans, link_target_filename, target_looks_like_image,
    url_with_allowed_scheme, wikilink_char_spans, LinkSpan, LinkSpanKind,
};

use crate::nfs::VaultPath;

#[derive(Debug, Clone, Eq, PartialEq)]
pub struct NoteDetails {
    pub path: VaultPath,
    pub raw_text: String,
}

impl Display for NoteDetails {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        writeln!(f, "Path: {}, Content: {}", self.path, self.raw_text)
    }
}

impl NoteDetails {
    pub fn new<S: AsRef<str>>(note_path: &VaultPath, text: S) -> Self {
        Self {
            path: note_path.flatten(),
            raw_text: text.as_ref().to_owned(),
        }
    }

    pub fn get_title_from_text<S: AsRef<str>>(text: S) -> String {
        extract_title(text)
    }

    pub fn get_title(&self) -> String {
        Self::get_title_from_text(&self.raw_text)
    }

    pub fn get_content_data(&self) -> NoteContentData {
        get_content_data(&self.raw_text)
    }

    pub fn get_content_chunks(&self) -> Vec<ContentChunk> {
        get_content_chunks(&self.raw_text)
    }

    pub fn get_chunks_and_links(&self) -> (Vec<ContentChunk>, Vec<NoteLink>) {
        get_chunks_and_links(&self.path, &self.raw_text)
    }
}

#[derive(Clone, Debug, PartialEq)]
pub struct MarkdownNote {
    pub text: String,
    pub links: Vec<NoteLink>,
}

/// NoteContentData contains the basic extracted data from the note
/// for comparison and search in the DB, it is expensive to get
/// so it is not a good idea to calculate it every time the content
/// has changed, but better lazy get it when needed and cache it somewhere
/// (like the DB) for search and access.
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize)]
pub struct NoteContentData {
    pub title: String,
    pub hash: u64,
}

impl NoteContentData {
    pub fn new(title: String, hash: u64) -> Self {
        Self { title, hash }
    }
}

impl Display for NoteContentData {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "Title: {}, Hash: {}", self.title, self.hash,)
    }
}

/// Separator used to flatten the heading hierarchy of a chunk into the single
/// `breadcrumb` string stored in the FTS column and in memory.
///
/// Uses ASCII Unit Separator (U+001F) so heading text containing visible
/// punctuation — including `>`, `/`, `|`, `:` — round-trips correctly through
/// `breadcrumb_parts()` / `breadcrumb_last()`. Not the `>` used as the search
/// query operator in `db::search_terms`.
pub const BREADCRUMB_SEP: &str = "\x1f";

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ContentChunk {
    pub breadcrumb: String,
    pub text: String,
}

impl ContentChunk {
    pub fn get_breadcrumb(&self) -> &str {
        &self.breadcrumb
    }

    /// Iterator over the heading components from outermost to innermost.
    /// Empty breadcrumb yields no items.
    pub fn breadcrumb_parts(&self) -> impl Iterator<Item = &str> {
        self.breadcrumb
            .split(BREADCRUMB_SEP)
            .filter(|s| !s.is_empty())
    }

    /// Last (innermost) heading in the breadcrumb, if any. O(last-segment-len)
    /// — scans backward from the end, short-circuiting at the first separator.
    pub fn breadcrumb_last(&self) -> Option<&str> {
        self.breadcrumb
            .rsplit(BREADCRUMB_SEP)
            .find(|s| !s.is_empty())
    }

    pub fn get_text(&self) -> &str {
        &self.text
    }
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub enum LinkType {
    Note(VaultPath),
    Attachment(VaultPath),
    /// Image link with its resolved path.
    /// For vault images: absolute OS path (e.g. `/home/user/vault/images/photo.png`).
    /// For external images: the original URL.
    Image(String),
    Url,
    Hashtag,
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct NoteLink {
    pub ltype: LinkType,
    pub text: String,
    // This is the link as it comes from the note, without any cleanup
    // so it may contain invalid characters and uppercases
    // in the case of note links, which are cleanup when converting to path
    pub raw_link: String,
}

impl NoteLink {
    pub fn vault_path<S: AsRef<str>>(path: &VaultPath, text: S) -> Self {
        let ltype = if path.is_note() {
            LinkType::Note(path.to_owned())
        } else {
            LinkType::Attachment(path.to_owned())
        };
        Self {
            ltype,
            text: text.as_ref().to_string(),
            raw_link: path.to_string(),
        }
    }
    pub fn note<S: AsRef<str>>(path: &VaultPath, text: S) -> Self {
        Self {
            ltype: LinkType::Note(path.to_owned()),
            text: text.as_ref().to_string(),
            raw_link: path.to_string(),
        }
    }
    pub fn url<S: AsRef<str>, T: AsRef<str>>(url: S, text: T) -> Self {
        Self {
            ltype: LinkType::Url,
            text: text.as_ref().to_string(),
            raw_link: url.as_ref().to_string(),
        }
    }
    pub fn hashtag<S: AsRef<str>>(tag: S) -> Self {
        let tag_text = tag.as_ref().to_string();
        Self {
            ltype: LinkType::Hashtag,
            text: tag_text.clone(),
            raw_link: format!("#{}", tag_text),
        }
    }
    /// Image link.
    /// `resolved_path`: absolute OS path for vault images, original URL for external images.
    /// `alt_text`: the alt text from the markdown `![alt_text](...)`.
    /// `raw_link`: the original path/URL as written in the note.
    pub fn image<S: AsRef<str>, T: AsRef<str>, U: AsRef<str>>(
        resolved_path: S,
        alt_text: T,
        raw_link: U,
    ) -> Self {
        Self {
            ltype: LinkType::Image(resolved_path.as_ref().to_string()),
            text: alt_text.as_ref().to_string(),
            raw_link: raw_link.as_ref().to_string(),
        }
    }
}