kimun_notes/cli/
metadata_extractor.rs1use crate::cli::json_output::JsonHeader;
2use regex::Regex;
3use std::collections::HashSet;
4use std::sync::OnceLock;
5
6fn hashtag_regex() -> &'static Regex {
7 static REGEX: OnceLock<Regex> = OnceLock::new();
8 REGEX.get_or_init(|| Regex::new(r"#([a-zA-Z0-9_-]+)").unwrap())
9}
10
11fn header_regex() -> &'static Regex {
12 static REGEX: OnceLock<Regex> = OnceLock::new();
13 REGEX.get_or_init(|| Regex::new(r"^(#{1,6})\s+(.+)$").unwrap())
14}
15
16pub fn extract_tags(content: &str) -> Vec<String> {
17 let mut tags: HashSet<String> = HashSet::new();
18
19 if let Some(frontmatter) = extract_frontmatter(content)
21 && let Some(yaml_tags) = extract_frontmatter_tags(&frontmatter) {
22 for tag in yaml_tags {
23 tags.insert(tag);
24 }
25 }
26
27 for capture in hashtag_regex().captures_iter(content) {
29 if let Some(tag) = capture.get(1) {
30 tags.insert(tag.as_str().to_string());
31 }
32 }
33
34 let mut result: Vec<String> = tags.into_iter().collect();
35 result.sort();
36 result
37}
38
39pub fn extract_links(content: &str) -> Vec<String> {
40 kimun_core::note::link_char_spans(content)
41 .into_iter()
42 .map(|span| span.target)
43 .collect()
44}
45
46pub fn extract_headers(content: &str) -> Vec<JsonHeader> {
47 let mut headers: Vec<JsonHeader> = Vec::new();
48
49 for line in content.lines() {
50 if let Some(capture) = header_regex().captures(line)
51 && let (Some(level_match), Some(text_match)) = (capture.get(1), capture.get(2)) {
52 let level = level_match.as_str().len() as u32;
53 let text = text_match.as_str().trim().to_string();
54 headers.push(JsonHeader { text, level });
55 }
56 }
57
58 headers
59}
60
61fn extract_frontmatter(content: &str) -> Option<String> {
62 if !content.starts_with("---") {
63 return None;
64 }
65
66 let lines: Vec<&str> = content.lines().collect();
67 if lines.len() < 3 {
68 return None;
69 }
70
71 let mut end_index = None;
72 for (i, line) in lines.iter().enumerate().skip(1) {
73 if line.trim() == "---" {
74 end_index = Some(i);
75 break;
76 }
77 }
78
79 if let Some(end) = end_index {
80 let frontmatter_lines = &lines[1..end];
81 Some(frontmatter_lines.join("\n"))
82 } else {
83 None
84 }
85}
86
87fn extract_frontmatter_tags(frontmatter: &str) -> Option<Vec<String>> {
88 let mut tags: Vec<String> = Vec::new();
89 let mut in_tags_block = false;
90
91 for line in frontmatter.lines() {
92 let line = line.trim();
93
94 if let Some(tags_str) = line.strip_prefix("tags:") {
96 let trimmed = tags_str.trim();
97
98 if trimmed.starts_with('[') && trimmed.ends_with(']') {
100 let cleaned = trimmed.strip_prefix('[')
101 .and_then(|s| s.strip_suffix(']'))
102 .unwrap_or(trimmed);
103
104 for tag in cleaned.split(',') {
105 let clean_tag = tag.trim()
106 .strip_prefix('"')
107 .and_then(|s| s.strip_suffix('"'))
108 .or_else(|| tag.trim().strip_prefix('\'').and_then(|s| s.strip_suffix('\'')))
109 .unwrap_or(tag.trim());
110
111 if !clean_tag.is_empty() {
112 tags.push(clean_tag.to_string());
113 }
114 }
115 }
116 else if trimmed.is_empty() {
118 in_tags_block = true;
119 }
120 else {
122 let clean_tag = trimmed
123 .strip_prefix('"')
124 .and_then(|s| s.strip_suffix('"'))
125 .unwrap_or(trimmed);
126 if !clean_tag.is_empty() {
127 tags.push(clean_tag.to_string());
128 }
129 }
130 }
131 else if in_tags_block && line.starts_with('-') {
133 if let Some(tag_str) = line.strip_prefix('-') {
134 let clean_tag = tag_str.trim()
135 .strip_prefix('"')
136 .and_then(|s| s.strip_suffix('"'))
137 .or_else(|| tag_str.trim().strip_prefix('\'').and_then(|s| s.strip_suffix('\'')))
138 .unwrap_or(tag_str.trim());
139
140 if !clean_tag.is_empty() {
141 tags.push(clean_tag.to_string());
142 }
143 }
144 }
145 else if let Some(tag_str) = line.strip_prefix("tag:") {
147 let clean_tag = tag_str.trim()
148 .strip_prefix('"')
149 .and_then(|s| s.strip_suffix('"'))
150 .unwrap_or(tag_str.trim());
151
152 if !clean_tag.is_empty() {
153 tags.push(clean_tag.to_string());
154 }
155 }
156 else if in_tags_block && (line.contains(':') || line.is_empty()) {
158 in_tags_block = false;
159 }
160 }
161
162 if tags.is_empty() { None } else { Some(tags) }
163}
164
165#[cfg(test)]
166mod tests {
167 use super::*;
168
169 #[test]
170 fn frontmatter_tags_array_format() {
171 let frontmatter = r#"tags: ["project", "urgent"]
172title: "Test Note""#;
173
174 let tags = extract_frontmatter_tags(frontmatter).unwrap();
175 assert_eq!(tags, vec!["project", "urgent"]);
176 }
177
178 #[test]
179 fn frontmatter_single_tag_format() {
180 let frontmatter = r#"tag: meeting
181title: "Test Note""#;
182
183 let tags = extract_frontmatter_tags(frontmatter).unwrap();
184 assert_eq!(tags, vec!["meeting"]);
185 }
186}