kimun_notes/cli/
metadata_extractor.rs1use crate::cli::json_output::JsonHeader;
2use regex::Regex;
3use std::collections::HashSet;
4use std::sync::OnceLock;
5
6fn header_regex() -> &'static Regex {
7 static REGEX: OnceLock<Regex> = OnceLock::new();
8 REGEX.get_or_init(|| Regex::new(r"^(#{1,6})\s+(.+)$").unwrap())
9}
10
11pub fn extract_tags(content: &str) -> Vec<String> {
12 let mut tags: HashSet<String> = HashSet::new();
13
14 if let Some(frontmatter) = extract_frontmatter(content)
16 && let Some(yaml_tags) = extract_frontmatter_tags(&frontmatter)
17 {
18 for tag in yaml_tags {
19 tags.insert(tag.to_lowercase());
20 }
21 }
22
23 for tag in kimun_core::note::extract_labels(content) {
27 tags.insert(tag);
28 }
29
30 let mut result: Vec<String> = tags.into_iter().collect();
31 result.sort();
32 result
33}
34
35pub fn extract_links(content: &str) -> Vec<String> {
36 kimun_core::note::link_char_spans(content)
37 .into_iter()
38 .map(|span| span.target)
39 .collect()
40}
41
42pub fn extract_headers(content: &str) -> Vec<JsonHeader> {
43 let mut headers: Vec<JsonHeader> = Vec::new();
44
45 for line in content.lines() {
46 if let Some(capture) = header_regex().captures(line)
47 && let (Some(level_match), Some(text_match)) = (capture.get(1), capture.get(2))
48 {
49 let level = level_match.as_str().len() as u32;
50 let text = text_match.as_str().trim().to_string();
51 headers.push(JsonHeader { text, level });
52 }
53 }
54
55 headers
56}
57
58fn extract_frontmatter(content: &str) -> Option<String> {
59 if !content.starts_with("---") {
60 return None;
61 }
62
63 let lines: Vec<&str> = content.lines().collect();
64 if lines.len() < 3 {
65 return None;
66 }
67
68 let mut end_index = None;
69 for (i, line) in lines.iter().enumerate().skip(1) {
70 if line.trim() == "---" {
71 end_index = Some(i);
72 break;
73 }
74 }
75
76 if let Some(end) = end_index {
77 let frontmatter_lines = &lines[1..end];
78 Some(frontmatter_lines.join("\n"))
79 } else {
80 None
81 }
82}
83
84fn extract_frontmatter_tags(frontmatter: &str) -> Option<Vec<String>> {
85 let mut tags: Vec<String> = Vec::new();
86 let mut in_tags_block = false;
87
88 for line in frontmatter.lines() {
89 let line = line.trim();
90
91 if let Some(tags_str) = line.strip_prefix("tags:") {
93 let trimmed = tags_str.trim();
94
95 if trimmed.starts_with('[') && trimmed.ends_with(']') {
97 let cleaned = trimmed
98 .strip_prefix('[')
99 .and_then(|s| s.strip_suffix(']'))
100 .unwrap_or(trimmed);
101
102 for tag in cleaned.split(',') {
103 let clean_tag = tag
104 .trim()
105 .strip_prefix('"')
106 .and_then(|s| s.strip_suffix('"'))
107 .or_else(|| {
108 tag.trim()
109 .strip_prefix('\'')
110 .and_then(|s| s.strip_suffix('\''))
111 })
112 .unwrap_or(tag.trim());
113
114 if !clean_tag.is_empty() {
115 tags.push(clean_tag.to_string());
116 }
117 }
118 }
119 else if trimmed.is_empty() {
121 in_tags_block = true;
122 }
123 else {
125 let clean_tag = trimmed
126 .strip_prefix('"')
127 .and_then(|s| s.strip_suffix('"'))
128 .unwrap_or(trimmed);
129 if !clean_tag.is_empty() {
130 tags.push(clean_tag.to_string());
131 }
132 }
133 }
134 else if in_tags_block && line.starts_with('-') {
136 if let Some(tag_str) = line.strip_prefix('-') {
137 let clean_tag = tag_str
138 .trim()
139 .strip_prefix('"')
140 .and_then(|s| s.strip_suffix('"'))
141 .or_else(|| {
142 tag_str
143 .trim()
144 .strip_prefix('\'')
145 .and_then(|s| s.strip_suffix('\''))
146 })
147 .unwrap_or(tag_str.trim());
148
149 if !clean_tag.is_empty() {
150 tags.push(clean_tag.to_string());
151 }
152 }
153 }
154 else if let Some(tag_str) = line.strip_prefix("tag:") {
156 let clean_tag = tag_str
157 .trim()
158 .strip_prefix('"')
159 .and_then(|s| s.strip_suffix('"'))
160 .unwrap_or(tag_str.trim());
161
162 if !clean_tag.is_empty() {
163 tags.push(clean_tag.to_string());
164 }
165 }
166 else if in_tags_block && (line.contains(':') || line.is_empty()) {
168 in_tags_block = false;
169 }
170 }
171
172 if tags.is_empty() { None } else { Some(tags) }
173}
174
175#[cfg(test)]
176mod tests {
177 use super::*;
178
179 #[test]
180 fn frontmatter_tags_array_format() {
181 let frontmatter = r#"tags: ["project", "urgent"]
182title: "Test Note""#;
183
184 let tags = extract_frontmatter_tags(frontmatter).unwrap();
185 assert_eq!(tags, vec!["project", "urgent"]);
186 }
187
188 #[test]
189 fn frontmatter_single_tag_format() {
190 let frontmatter = r#"tag: meeting
191title: "Test Note""#;
192
193 let tags = extract_frontmatter_tags(frontmatter).unwrap();
194 assert_eq!(tags, vec!["meeting"]);
195 }
196
197 #[test]
198 fn extract_tags_matches_core_label_rules() {
199 let body =
200 "---\ntags: [yaml_tag]\n---\nplain #body and #tag-with-dash\n```\n#code_tag\n```";
201 let tags = extract_tags(body);
202 assert!(tags.contains(&"yaml_tag".to_string()));
205 assert!(tags.contains(&"body".to_string()));
206 assert!(tags.contains(&"tag".to_string()));
207 assert!(!tags.contains(&"code_tag".to_string()), "got: {:?}", tags);
208 assert!(
209 !tags.contains(&"tag-with-dash".to_string()),
210 "got: {:?}",
211 tags
212 );
213 }
214}