kimun_notes/cli/
metadata_extractor.rs1use crate::cli::json_output::JsonHeader;
2use regex::Regex;
3use std::collections::HashSet;
4use std::sync::OnceLock;
5
6fn hashtag_regex() -> &'static Regex {
7 static REGEX: OnceLock<Regex> = OnceLock::new();
8 REGEX.get_or_init(|| Regex::new(r"#([a-zA-Z0-9_-]+)").unwrap())
9}
10
11fn header_regex() -> &'static Regex {
12 static REGEX: OnceLock<Regex> = OnceLock::new();
13 REGEX.get_or_init(|| Regex::new(r"^(#{1,6})\s+(.+)$").unwrap())
14}
15
16pub fn extract_tags(content: &str) -> Vec<String> {
17 let mut tags: HashSet<String> = HashSet::new();
18
19 if let Some(frontmatter) = extract_frontmatter(content)
21 && let Some(yaml_tags) = extract_frontmatter_tags(&frontmatter)
22 {
23 for tag in yaml_tags {
24 tags.insert(tag);
25 }
26 }
27
28 for capture in hashtag_regex().captures_iter(content) {
30 if let Some(tag) = capture.get(1) {
31 tags.insert(tag.as_str().to_string());
32 }
33 }
34
35 let mut result: Vec<String> = tags.into_iter().collect();
36 result.sort();
37 result
38}
39
40pub fn extract_links(content: &str) -> Vec<String> {
41 kimun_core::note::link_char_spans(content)
42 .into_iter()
43 .map(|span| span.target)
44 .collect()
45}
46
47pub fn extract_headers(content: &str) -> Vec<JsonHeader> {
48 let mut headers: Vec<JsonHeader> = Vec::new();
49
50 for line in content.lines() {
51 if let Some(capture) = header_regex().captures(line)
52 && let (Some(level_match), Some(text_match)) = (capture.get(1), capture.get(2))
53 {
54 let level = level_match.as_str().len() as u32;
55 let text = text_match.as_str().trim().to_string();
56 headers.push(JsonHeader { text, level });
57 }
58 }
59
60 headers
61}
62
63fn extract_frontmatter(content: &str) -> Option<String> {
64 if !content.starts_with("---") {
65 return None;
66 }
67
68 let lines: Vec<&str> = content.lines().collect();
69 if lines.len() < 3 {
70 return None;
71 }
72
73 let mut end_index = None;
74 for (i, line) in lines.iter().enumerate().skip(1) {
75 if line.trim() == "---" {
76 end_index = Some(i);
77 break;
78 }
79 }
80
81 if let Some(end) = end_index {
82 let frontmatter_lines = &lines[1..end];
83 Some(frontmatter_lines.join("\n"))
84 } else {
85 None
86 }
87}
88
89fn extract_frontmatter_tags(frontmatter: &str) -> Option<Vec<String>> {
90 let mut tags: Vec<String> = Vec::new();
91 let mut in_tags_block = false;
92
93 for line in frontmatter.lines() {
94 let line = line.trim();
95
96 if let Some(tags_str) = line.strip_prefix("tags:") {
98 let trimmed = tags_str.trim();
99
100 if trimmed.starts_with('[') && trimmed.ends_with(']') {
102 let cleaned = trimmed
103 .strip_prefix('[')
104 .and_then(|s| s.strip_suffix(']'))
105 .unwrap_or(trimmed);
106
107 for tag in cleaned.split(',') {
108 let clean_tag = tag
109 .trim()
110 .strip_prefix('"')
111 .and_then(|s| s.strip_suffix('"'))
112 .or_else(|| {
113 tag.trim()
114 .strip_prefix('\'')
115 .and_then(|s| s.strip_suffix('\''))
116 })
117 .unwrap_or(tag.trim());
118
119 if !clean_tag.is_empty() {
120 tags.push(clean_tag.to_string());
121 }
122 }
123 }
124 else if trimmed.is_empty() {
126 in_tags_block = true;
127 }
128 else {
130 let clean_tag = trimmed
131 .strip_prefix('"')
132 .and_then(|s| s.strip_suffix('"'))
133 .unwrap_or(trimmed);
134 if !clean_tag.is_empty() {
135 tags.push(clean_tag.to_string());
136 }
137 }
138 }
139 else if in_tags_block && line.starts_with('-') {
141 if let Some(tag_str) = line.strip_prefix('-') {
142 let clean_tag = tag_str
143 .trim()
144 .strip_prefix('"')
145 .and_then(|s| s.strip_suffix('"'))
146 .or_else(|| {
147 tag_str
148 .trim()
149 .strip_prefix('\'')
150 .and_then(|s| s.strip_suffix('\''))
151 })
152 .unwrap_or(tag_str.trim());
153
154 if !clean_tag.is_empty() {
155 tags.push(clean_tag.to_string());
156 }
157 }
158 }
159 else if let Some(tag_str) = line.strip_prefix("tag:") {
161 let clean_tag = tag_str
162 .trim()
163 .strip_prefix('"')
164 .and_then(|s| s.strip_suffix('"'))
165 .unwrap_or(tag_str.trim());
166
167 if !clean_tag.is_empty() {
168 tags.push(clean_tag.to_string());
169 }
170 }
171 else if in_tags_block && (line.contains(':') || line.is_empty()) {
173 in_tags_block = false;
174 }
175 }
176
177 if tags.is_empty() { None } else { Some(tags) }
178}
179
180#[cfg(test)]
181mod tests {
182 use super::*;
183
184 #[test]
185 fn frontmatter_tags_array_format() {
186 let frontmatter = r#"tags: ["project", "urgent"]
187title: "Test Note""#;
188
189 let tags = extract_frontmatter_tags(frontmatter).unwrap();
190 assert_eq!(tags, vec!["project", "urgent"]);
191 }
192
193 #[test]
194 fn frontmatter_single_tag_format() {
195 let frontmatter = r#"tag: meeting
196title: "Test Note""#;
197
198 let tags = extract_frontmatter_tags(frontmatter).unwrap();
199 assert_eq!(tags, vec!["meeting"]);
200 }
201}