use crate::cli::json_output::JsonHeader;
use regex::Regex;
use std::collections::HashSet;
use std::sync::OnceLock;
fn hashtag_regex() -> &'static Regex {
static REGEX: OnceLock<Regex> = OnceLock::new();
REGEX.get_or_init(|| Regex::new(r"#([a-zA-Z0-9_-]+)").unwrap())
}
fn header_regex() -> &'static Regex {
static REGEX: OnceLock<Regex> = OnceLock::new();
REGEX.get_or_init(|| Regex::new(r"^(#{1,6})\s+(.+)$").unwrap())
}
pub fn extract_tags(content: &str) -> Vec<String> {
let mut tags: HashSet<String> = HashSet::new();
if let Some(frontmatter) = extract_frontmatter(content)
&& let Some(yaml_tags) = extract_frontmatter_tags(&frontmatter) {
for tag in yaml_tags {
tags.insert(tag);
}
}
for capture in hashtag_regex().captures_iter(content) {
if let Some(tag) = capture.get(1) {
tags.insert(tag.as_str().to_string());
}
}
let mut result: Vec<String> = tags.into_iter().collect();
result.sort();
result
}
pub fn extract_links(content: &str) -> Vec<String> {
kimun_core::note::link_char_spans(content)
.into_iter()
.map(|span| span.target)
.collect()
}
pub fn extract_headers(content: &str) -> Vec<JsonHeader> {
let mut headers: Vec<JsonHeader> = Vec::new();
for line in content.lines() {
if let Some(capture) = header_regex().captures(line)
&& let (Some(level_match), Some(text_match)) = (capture.get(1), capture.get(2)) {
let level = level_match.as_str().len() as u32;
let text = text_match.as_str().trim().to_string();
headers.push(JsonHeader { text, level });
}
}
headers
}
fn extract_frontmatter(content: &str) -> Option<String> {
if !content.starts_with("---") {
return None;
}
let lines: Vec<&str> = content.lines().collect();
if lines.len() < 3 {
return None;
}
let mut end_index = None;
for (i, line) in lines.iter().enumerate().skip(1) {
if line.trim() == "---" {
end_index = Some(i);
break;
}
}
if let Some(end) = end_index {
let frontmatter_lines = &lines[1..end];
Some(frontmatter_lines.join("\n"))
} else {
None
}
}
fn extract_frontmatter_tags(frontmatter: &str) -> Option<Vec<String>> {
let mut tags: Vec<String> = Vec::new();
let mut in_tags_block = false;
for line in frontmatter.lines() {
let line = line.trim();
if let Some(tags_str) = line.strip_prefix("tags:") {
let trimmed = tags_str.trim();
if trimmed.starts_with('[') && trimmed.ends_with(']') {
let cleaned = trimmed.strip_prefix('[')
.and_then(|s| s.strip_suffix(']'))
.unwrap_or(trimmed);
for tag in cleaned.split(',') {
let clean_tag = tag.trim()
.strip_prefix('"')
.and_then(|s| s.strip_suffix('"'))
.or_else(|| tag.trim().strip_prefix('\'').and_then(|s| s.strip_suffix('\'')))
.unwrap_or(tag.trim());
if !clean_tag.is_empty() {
tags.push(clean_tag.to_string());
}
}
}
else if trimmed.is_empty() {
in_tags_block = true;
}
else {
let clean_tag = trimmed
.strip_prefix('"')
.and_then(|s| s.strip_suffix('"'))
.unwrap_or(trimmed);
if !clean_tag.is_empty() {
tags.push(clean_tag.to_string());
}
}
}
else if in_tags_block && line.starts_with('-') {
if let Some(tag_str) = line.strip_prefix('-') {
let clean_tag = tag_str.trim()
.strip_prefix('"')
.and_then(|s| s.strip_suffix('"'))
.or_else(|| tag_str.trim().strip_prefix('\'').and_then(|s| s.strip_suffix('\'')))
.unwrap_or(tag_str.trim());
if !clean_tag.is_empty() {
tags.push(clean_tag.to_string());
}
}
}
else if let Some(tag_str) = line.strip_prefix("tag:") {
let clean_tag = tag_str.trim()
.strip_prefix('"')
.and_then(|s| s.strip_suffix('"'))
.unwrap_or(tag_str.trim());
if !clean_tag.is_empty() {
tags.push(clean_tag.to_string());
}
}
else if in_tags_block && (line.contains(':') || line.is_empty()) {
in_tags_block = false;
}
}
if tags.is_empty() { None } else { Some(tags) }
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn frontmatter_tags_array_format() {
let frontmatter = r#"tags: ["project", "urgent"]
title: "Test Note""#;
let tags = extract_frontmatter_tags(frontmatter).unwrap();
assert_eq!(tags, vec!["project", "urgent"]);
}
#[test]
fn frontmatter_single_tag_format() {
let frontmatter = r#"tag: meeting
title: "Test Note""#;
let tags = extract_frontmatter_tags(frontmatter).unwrap();
assert_eq!(tags, vec!["meeting"]);
}
}