1use std::path::Path;
4use std::sync::LazyLock;
5
6use regex::Regex;
7
8use crate::frontmatter::{self, Frontmatter};
9use crate::index::types::{LinkType, NoteType};
10
11#[derive(Debug, Clone)]
13pub struct ExtractedNote {
14 pub title: String,
16 pub note_type: NoteType,
18 pub frontmatter_json: Option<String>,
20 pub links: Vec<ExtractedLink>,
22}
23
24#[derive(Debug, Clone)]
26pub struct ExtractedLink {
27 pub target: String,
29 pub text: Option<String>,
31 pub link_type: LinkType,
33 pub line_number: u32,
35 pub context: Option<String>,
37}
38
39static WIKILINK_RE: LazyLock<Regex> = LazyLock::new(|| {
41 Regex::new(r"\[\[([^\]|]+)(?:\|([^\]]+))?\]\]").unwrap()
44});
45
46static MARKDOWN_LINK_RE: LazyLock<Regex> = LazyLock::new(|| {
47 Regex::new(r"\[([^\]]+)\]\(([^)]+)\)").unwrap()
50});
51
52pub fn extract_note(content: &str, file_path: &Path) -> ExtractedNote {
54 let parsed = frontmatter::parse(content).unwrap_or_else(|_| {
56 crate::frontmatter::ParsedDocument {
57 frontmatter: None,
58 body: content.to_string(),
59 }
60 });
61
62 let note_type = parsed
64 .frontmatter
65 .as_ref()
66 .and_then(|fm| fm.fields.get("type"))
67 .and_then(|v| v.as_str())
68 .map(|s| s.parse().unwrap_or_default())
69 .unwrap_or_default();
70
71 let title = extract_title(&parsed.frontmatter, &parsed.body, file_path);
73
74 let frontmatter_json = parsed
76 .frontmatter
77 .as_ref()
78 .map(|fm| serde_json::to_string(&fm.fields).unwrap_or_default());
79
80 let mut links = extract_links(&parsed.body);
82
83 let fm_links = extract_frontmatter_links(&parsed.frontmatter);
85 links.extend(fm_links);
86
87 ExtractedNote { title, note_type, frontmatter_json, links }
88}
89
90fn extract_title(fm: &Option<Frontmatter>, body: &str, file_path: &Path) -> String {
91 if let Some(fm) = fm
93 && let Some(title) = fm.fields.get("title").and_then(|v| v.as_str())
94 {
95 return title.to_string();
96 }
97
98 for line in body.lines() {
100 let trimmed = line.trim();
101 if let Some(heading) = trimmed.strip_prefix('#') {
102 let heading = heading.trim_start_matches('#').trim();
103 if !heading.is_empty() {
104 return heading.to_string();
105 }
106 }
107 }
108
109 file_path.file_stem().and_then(|s| s.to_str()).unwrap_or("Untitled").to_string()
111}
112
113fn extract_links(body: &str) -> Vec<ExtractedLink> {
114 let mut links = Vec::new();
115
116 for (line_num, line) in body.lines().enumerate() {
117 let line_number = (line_num + 1) as u32;
118
119 for cap in WIKILINK_RE.captures_iter(line) {
121 let target = cap.get(1).map(|m| m.as_str()).unwrap_or("");
122 let alias = cap.get(2).map(|m| m.as_str().to_string());
123
124 links.push(ExtractedLink {
125 target: target.to_string(),
126 text: alias,
127 link_type: LinkType::Wikilink,
128 line_number,
129 context: Some(truncate_context(line, 100)),
130 });
131 }
132
133 for cap in MARKDOWN_LINK_RE.captures_iter(line) {
135 let text = cap.get(1).map(|m| m.as_str()).unwrap_or("");
136 let url = cap.get(2).map(|m| m.as_str()).unwrap_or("");
137
138 if url.starts_with("http://") || url.starts_with("https://") {
140 continue;
141 }
142
143 if !url.ends_with(".md") && !is_likely_note_reference(url) {
145 continue;
146 }
147
148 links.push(ExtractedLink {
149 target: url.to_string(),
150 text: Some(text.to_string()),
151 link_type: LinkType::Markdown,
152 line_number,
153 context: Some(truncate_context(line, 100)),
154 });
155 }
156 }
157
158 links
159}
160
161fn is_likely_note_reference(url: &str) -> bool {
162 let lower = url.to_lowercase();
167
168 if lower.ends_with(".png")
170 || lower.ends_with(".jpg")
171 || lower.ends_with(".jpeg")
172 || lower.ends_with(".gif")
173 || lower.ends_with(".svg")
174 || lower.ends_with(".pdf")
175 {
176 return false;
177 }
178
179 !url.contains('.')
181}
182
183fn extract_frontmatter_links(fm: &Option<Frontmatter>) -> Vec<ExtractedLink> {
184 let mut links = Vec::new();
185
186 let fm = match fm {
187 Some(fm) => fm,
188 None => return links,
189 };
190
191 let ref_fields = ["project", "parent", "related", "blocks", "blocked_by"];
193
194 for field in &ref_fields {
195 if let Some(value) = fm.fields.get(*field) {
196 if let Some(s) = value.as_str() {
198 links.push(ExtractedLink {
199 target: s.to_string(),
200 text: Some(format!("{}: {}", field, s)),
201 link_type: LinkType::Frontmatter,
202 line_number: 0, context: None,
204 });
205 }
206 if let Some(arr) = value.as_sequence() {
208 for item in arr {
209 if let Some(s) = item.as_str() {
210 links.push(ExtractedLink {
211 target: s.to_string(),
212 text: Some(format!("{}: {}", field, s)),
213 link_type: LinkType::Frontmatter,
214 line_number: 0,
215 context: None,
216 });
217 }
218 }
219 }
220 }
221 }
222
223 links
224}
225
226fn truncate_context(line: &str, max_len: usize) -> String {
227 if line.len() <= max_len {
228 line.to_string()
229 } else {
230 let mut end = max_len;
233 while end > 0 && !line.is_char_boundary(end) {
234 end -= 1;
235 }
236 format!("{}...", &line[..end])
237 }
238}
239
240#[cfg(test)]
241mod tests {
242 use super::*;
243
244 #[test]
245 fn test_extract_wikilinks() {
246 let content = r#"---
247title: Test Note
248---
249# Heading
250
251This links to [[other-note]] and [[another|with alias]].
252Also [[path/to/note]] works.
253"#;
254 let note = extract_note(content, Path::new("test.md"));
255
256 assert_eq!(note.links.len(), 3);
257 assert_eq!(note.links[0].target, "other-note");
258 assert_eq!(note.links[0].text, None);
259 assert_eq!(note.links[0].link_type, LinkType::Wikilink);
260
261 assert_eq!(note.links[1].target, "another");
262 assert_eq!(note.links[1].text, Some("with alias".to_string()));
263
264 assert_eq!(note.links[2].target, "path/to/note");
265 }
266
267 #[test]
268 fn test_extract_markdown_links() {
269 let content = r#"# Note
270
271See [this note](./other.md) for details.
272Also [external](https://example.com) should be skipped.
273And [image](./pic.png) should be skipped too.
274"#;
275 let note = extract_note(content, Path::new("test.md"));
276
277 assert_eq!(note.links.len(), 1);
278 assert_eq!(note.links[0].target, "./other.md");
279 assert_eq!(note.links[0].text, Some("this note".to_string()));
280 assert_eq!(note.links[0].link_type, LinkType::Markdown);
281 }
282
283 #[test]
284 fn test_extract_frontmatter_links() {
285 let content = r#"---
286title: Task
287type: task
288project: my-project
289related:
290 - note-a
291 - note-b
292---
293# Task content
294"#;
295 let note = extract_note(content, Path::new("task.md"));
296
297 let fm_links: Vec<_> =
298 note.links.iter().filter(|l| l.link_type == LinkType::Frontmatter).collect();
299
300 assert_eq!(fm_links.len(), 3);
301 assert!(fm_links.iter().any(|l| l.target == "my-project"));
302 assert!(fm_links.iter().any(|l| l.target == "note-a"));
303 assert!(fm_links.iter().any(|l| l.target == "note-b"));
304 }
305
306 #[test]
307 fn test_extract_title_from_frontmatter() {
308 let content = r#"---
309title: My Title
310---
311# Heading
312"#;
313 let note = extract_note(content, Path::new("file.md"));
314 assert_eq!(note.title, "My Title");
315 }
316
317 #[test]
318 fn test_extract_title_from_heading() {
319 let content = "# First Heading\n\nContent here.";
320 let note = extract_note(content, Path::new("file.md"));
321 assert_eq!(note.title, "First Heading");
322 }
323
324 #[test]
325 fn test_extract_title_from_filename() {
326 let content = "No frontmatter, no heading.";
327 let note = extract_note(content, Path::new("my-note.md"));
328 assert_eq!(note.title, "my-note");
329 }
330
331 #[test]
332 fn test_extract_note_type() {
333 let content = r#"---
334type: task
335---
336# Task
337"#;
338 let note = extract_note(content, Path::new("task.md"));
339 assert_eq!(note.note_type, NoteType::Task);
340 }
341
342 #[test]
343 fn test_extract_note_type_default() {
344 let content = "# Just a note";
345 let note = extract_note(content, Path::new("note.md"));
346 assert_eq!(note.note_type, NoteType::None);
347 }
348
349 #[test]
350 fn test_line_numbers() {
351 let content = r#"Line 1
352Line 2 with [[link1]]
353Line 3
354Line 4 with [[link2]]
355"#;
356 let note = extract_note(content, Path::new("test.md"));
357
358 assert_eq!(note.links.len(), 2);
359 assert_eq!(note.links[0].line_number, 2);
360 assert_eq!(note.links[1].line_number, 4);
361 }
362
363 #[test]
364 fn test_wikilink_with_section() {
365 let content = "Link to [[note#section]] here.";
366 let note = extract_note(content, Path::new("test.md"));
367
368 assert_eq!(note.links.len(), 1);
369 assert_eq!(note.links[0].target, "note#section");
370 }
371}