pub(crate) mod content_extractor;
use std::fmt::Display;
pub(crate) use content_extractor::{process_image_links, replace_note_links};
use crate::nfs::VaultPath;
pub mod scan {
pub use super::content_extractor::{
is_inside_code_link_or_frontmatter, is_inside_exclusion_zone, is_remote_url,
link_char_spans, link_target_filename, target_looks_like_image, url_with_allowed_scheme,
wikilink_char_spans, ExclusionZones, LinkSpan, LinkSpanKind,
};
#[derive(Debug, Clone, Copy)]
pub struct LabelMatch<'a> {
pub byte_start: usize,
pub byte_end: usize,
pub name: &'a str,
}
pub fn label_matches(text: &str) -> impl Iterator<Item = LabelMatch<'_>> + '_ {
super::content_extractor::label_matches_inner(text)
}
}
pub fn extract_labels(text: &str) -> Vec<String> {
let path = crate::nfs::VaultPath::root();
let (_md, links) = content_extractor::get_markdown_and_links(&path, text);
let mut seen = std::collections::BTreeSet::new();
for l in links {
if let LinkType::Hashtag = l.ltype {
seen.insert(l.text.to_lowercase());
}
}
seen.into_iter().collect()
}
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct NoteDetails {
pub path: VaultPath,
pub raw_text: String,
}
impl Display for NoteDetails {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
writeln!(f, "Path: {}, Content: {}", self.path, self.raw_text)
}
}
impl NoteDetails {
pub fn new<S: AsRef<str>>(note_path: &VaultPath, text: S) -> Self {
Self {
path: note_path.flatten(),
raw_text: text.as_ref().to_owned(),
}
}
pub fn get_title_from_text<S: AsRef<str>>(text: S) -> String {
content_extractor::extract_title(text)
}
pub fn content_data_of<S: AsRef<str>>(text: S) -> NoteContentData {
content_extractor::get_content_data(text)
}
pub fn content_chunks_of<S: AsRef<str>>(text: S) -> Vec<ContentChunk> {
content_extractor::get_content_chunks(text)
}
pub fn chunks_and_links_of<S: AsRef<str>>(
path: &VaultPath,
text: S,
) -> (Vec<ContentChunk>, Vec<NoteLink>) {
content_extractor::get_chunks_and_links(path, text)
}
pub fn get_title(&self) -> String {
Self::get_title_from_text(&self.raw_text)
}
pub fn get_content_data(&self) -> NoteContentData {
Self::content_data_of(&self.raw_text)
}
pub fn get_content_chunks(&self) -> Vec<ContentChunk> {
Self::content_chunks_of(&self.raw_text)
}
pub fn get_chunks_and_links(&self) -> (Vec<ContentChunk>, Vec<NoteLink>) {
Self::chunks_and_links_of(&self.path, &self.raw_text)
}
pub fn get_markdown_and_links(&self) -> (String, Vec<NoteLink>) {
content_extractor::get_markdown_and_links(&self.path, &self.raw_text)
}
}
#[derive(Clone, Debug, PartialEq)]
pub struct MarkdownNote {
pub text: String,
pub links: Vec<NoteLink>,
}
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize)]
pub struct NoteContentData {
pub title: String,
pub hash: u64,
}
impl NoteContentData {
pub fn new(title: String, hash: u64) -> Self {
Self { title, hash }
}
}
impl Display for NoteContentData {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "Title: {}, Hash: {}", self.title, self.hash,)
}
}
pub const BREADCRUMB_SEP: &str = "\x1f";
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ContentChunk {
pub breadcrumb: String,
pub text: String,
}
impl ContentChunk {
pub fn get_breadcrumb(&self) -> &str {
&self.breadcrumb
}
pub fn breadcrumb_parts(&self) -> impl Iterator<Item = &str> {
self.breadcrumb
.split(BREADCRUMB_SEP)
.filter(|s| !s.is_empty())
}
pub fn breadcrumb_last(&self) -> Option<&str> {
self.breadcrumb
.rsplit(BREADCRUMB_SEP)
.find(|s| !s.is_empty())
}
pub fn get_text(&self) -> &str {
&self.text
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum LinkType {
Note(VaultPath),
Attachment(VaultPath),
Image(String),
Url,
Hashtag,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct NoteLink {
pub ltype: LinkType,
pub text: String,
pub raw_link: String,
}
impl NoteLink {
pub fn vault_path<S: AsRef<str>>(path: &VaultPath, text: S) -> Self {
let ltype = if path.is_note() {
LinkType::Note(path.to_owned())
} else {
LinkType::Attachment(path.to_owned())
};
Self {
ltype,
text: text.as_ref().to_string(),
raw_link: path.to_string(),
}
}
pub fn note<S: AsRef<str>>(path: &VaultPath, text: S) -> Self {
Self {
ltype: LinkType::Note(path.to_owned()),
text: text.as_ref().to_string(),
raw_link: path.to_string(),
}
}
pub fn url<S: AsRef<str>, T: AsRef<str>>(url: S, text: T) -> Self {
Self {
ltype: LinkType::Url,
text: text.as_ref().to_string(),
raw_link: url.as_ref().to_string(),
}
}
pub fn hashtag<S: AsRef<str>>(tag: S) -> Self {
let tag_text = tag.as_ref().to_string();
Self {
ltype: LinkType::Hashtag,
text: tag_text.clone(),
raw_link: format!("#{}", tag_text),
}
}
pub fn image<S: AsRef<str>, T: AsRef<str>, U: AsRef<str>>(
resolved_path: S,
alt_text: T,
raw_link: U,
) -> Self {
Self {
ltype: LinkType::Image(resolved_path.as_ref().to_string()),
text: alt_text.as_ref().to_string(),
raw_link: raw_link.as_ref().to_string(),
}
}
}